コード例 #1
0
 def __init__(self, **kwargs):
     agent_type = kwargs.setdefault('agent_type', None)
     referer = kwargs.setdefault('referer', None)
     self.headers = kwargs.setdefault(
         'headers',
         HeaderGenerator().header(agent_type=agent_type, referer=referer))
     self.timeout = kwargs.setdefault('timeout', 10)
     self.resp_attributes = kwargs.setdefault('resp_attributes', [
         'content', 'encoding', 'headers', 'history', 'html', 'json', 'ok',
         'reason', 'status_code', 'text', 'url'
     ])
     self.elem_attributes = kwargs.setdefault('elem_attributes', [
         'absolute_links', 'base_url', 'encoding', 'full_text', 'html',
         'links', 'raw_html', 'text', 'url'
     ])
     self._logging = kwargs.setdefault('logging', True)
     if self._logging:
         log_path = kwargs.setdefault('log_path', '.')
         log_file = kwargs.setdefault('log_file', 'out.log')
         log_name = kwargs.setdefault('log_name', __name__)
         self._logger = Logger(name=log_name,
                               log_path=log_path,
                               log_file=log_file)
     self.session = requests_html.HTMLSession()
     self._err_recs = []
コード例 #2
0
    def __init__(self, path, attributes, name='table', verbosity=0):

        self.WRITING_REQUESTS = []
        self.READING_REQUESTS = {}
        self.UPDATE_REQUESTS = []
        Logger.__init__(self, 'SQLite interface', verbosity=verbosity)

        self.db_path = 'sqlite:///%s/search_progress.db' % path
        self.attributes = attributes
        self.name = name

        self.log('creating database %s at %s' % (self.name, self.db_path),
                 'DEBUG')

        # create database
        self.db = sql.create_engine(self.db_path)
        self.db.echo = False
        self.metadata = sql.MetaData(self.db)

        # create table in database
        self.table = sql.Table(self.name, self.metadata)
        for name, att_type in self.attributes.items():
            self.table.append_column(
                sql.Column(name, self.SQLITE_COLUMNS[att_type]))
        self.table.create(checkfirst=True)

        # start request processor
        self._process_requests()
コード例 #3
0
def handleRequest(command, args):
	respMsg = {'type':'response', 'command':command, 'data':None}

	if command == 'scanWifi':
		println("Command recevied to scan wifi")
		apInfo = Wifi.findMhaDevices()
		if len(apInfo) < 1:
			Logger.log(Logger.LogLevel.error, "Unable to find any MHA Devices in accespoint mode")
		else:
			data = []
			for ap in apInfo:
				data.append({'macaddr': Wifi.getMacAddr(ap), 'ssid':Wifi.getSsid(ap)})
			respMsg['data'] = data
			Logger.log(Logger.LogLevel.info, "New APs:" + str(data))
		publishMessage(json.dumps(respMsg))

	if command == 'connectToWifi':
		println('Command Received to connec to Wifi')
		if 'ssid' in args:
			ssid = args['ssid']
			macaddr = ''
			if 'macaddr' in args:
				macaddr = args['macaddr']
			status = {'connectedToWifi': 'True'}
			if connectToWifi(ssid, macaddr) != 0:
				status['connectedToWifi'] = 'False'
			respMsg['data'] = status
			publishMessage(json.dumps(respMsg))
コード例 #4
0
    def __init__(self,
                 mongo_address,
                 model_name,
                 total_indexes,
                 current_index,
                 n_neighbors=5,
                 algorithm="brute",
                 metric="euclidean"):
        self.logger = Logger()
        client = MongoClient(mongo_address)
        self.db = client.features

        self.records = self.load_data(model_name, total_indexes, current_index)
        features = list(map(lambda x: x["feature"], self.records))
        urls = list(map(lambda x: x["url"], self.records))
        self.logger.info(
            f"Indexing {len(urls)} records for model {model_name} using algorithm {algorithm}"
        )
        self.neighbors = None
        if len(self.records):
            self.neighbors = NearestNeighbors(n_neighbors=min(
                n_neighbors, len(self.records)),
                                              algorithm=algorithm,
                                              metric=metric).fit(features)
            self.logger.info(f"Done indexing {len(self.records)} records")
コード例 #5
0
	def __init__(self, config):
	
		self.config = config
		Logger.__init__(self, 'Acquisition', self.config.get('verbosity'))
		self.random_sampler   = RandomSampler(self.config.general, self.config.parameters)
		self.total_num_vars   = len(self.config.feature_names)
		self.local_optimizers = None		
		self.num_cpus         = multiprocessing.cpu_count()
コード例 #6
0
    def __init__(self, attributes, entries=[], verbosity=0):
        Logger.__init__(self, 'DB_Cache', verbosity=verbosity)
        self.attributes = attributes

        self.cache = {attr: [] for attr in self.attributes}
        self.num_items = 0
        for entry in entries:
            self.add(entry)
コード例 #7
0
    def __init__(self, config):
        self.config = config
        self.chimera = Chimera(self.config.obj_tolerances,
                               self.config.get('softness'))
        Logger.__init__(self,
                        'ObservationProcessor',
                        verbosity=self.config.get('verbosity'))

        # compute some boundaries
        self.feature_lowers = self.config.feature_lowers
        self.feature_uppers = self.config.feature_uppers
        self.soft_lower = self.feature_lowers + 0.1 * (self.feature_uppers -
                                                       self.feature_lowers)
        self.soft_upper = self.feature_uppers - 0.1 * (self.feature_uppers -
                                                       self.feature_lowers)
コード例 #8
0
	def __init__(self, config):
		self.config = config
		Logger.__init__(self, 'ParamOptimizer', verbosity = self.config.get('verbosity'))

		# parse positions
		self.pos_continuous = np.full(self.config.num_features, False, dtype = bool)
		for feature_index, feature_type in enumerate(self.config.feature_types):
			self.pos_continuous[feature_index] = True

		# set up continuous optimization algorithms
		cont_opt_name = self.config.get('continuous_optimizer')
		if cont_opt_name == 'adam':
			from Acquisition.NumpyOptimizers import AdamOptimizer
			self.opt_con = AdamOptimizer()
		else:
			PhoenicsUnkownSettingsError('did not understand continuous optimizer "%s".\n\tPlease choose from "adam"' % cont_opt_name)
コード例 #9
0
	def __init__(self, config_general, config_params):
		self.config_general = config_general
		self.config_params  = config_params
		verbosity           = self.config_general.verbosity
		if 'random_sampler' in self.config_general.verbosity:
			verbosity = self.config_general.verbosity['random_sampler']
		Logger.__init__(self, 'RandomSampler', verbosity)

		if self.config_general.sampler == 'sobol':
			from RandomSampler.sobol   import SobolContinuous
			self.continuous_sampler  = SobolContinuous()
		elif self.config_general.sampler == 'uniform':
			from RandomSampler.uniform import UniformContinuous
			self.continuous_sampler  = UniformContinuous()
		else:
			PhoenicsUnknownSettingsError('did not understanding sampler setting: "%s".\n\tChoose from "uniform" or "sobol"' % self.config_general.sampler)
コード例 #10
0
ファイル: WebDriver.py プロジェクト: edgarnav/selenium_test
class WebDriver:

    log = Log.func_logger()

    def init_driver(self):
        self.log.info("Opening in Chrome")
        return webdriver.Chrome("/Users/edgarnav/Documents/chromedriver")
コード例 #11
0
    def __init__(self, config, model_details=None):

        self.COUNTER = 0
        self.has_sampled = False
        self.config = config
        verbosity = self.config.get('verbosity')
        if 'bayesian_network' in verbosity:
            verbosity = verbosity['bayesian_network']
        Logger.__init__(self, 'BayesianNetwork', verbosity=verbosity)
        self.kernel_contribution = lambda x: (np.sum(x), 1.)

        # get bnn model detals
        if model_details == None:
            from BayesianNetwork.model_details import model_details
        self.model_details = model_details

        # set up bnn
        if self.config.get('backend') == 'tfprob':
            from BayesianNetwork.TfprobInterface import TfprobNetwork
            self.network_executable = '{}/BayesianNetwork/TfprobInterface/tfprob_interface.py'.format(
                self.config.get('home'))
        elif self.config.get('backend') == 'edward':
            from BayesianNetwork.EdwardInterface import EdwardNetwork
            self.network_executable = '%s/BayesianNetwork/EdwardInterface/edward_interface.py' % self.config.get(
                'home')
        else:
            PhoenicsUnknownSettingsError(
                'did not understand backend: "%s".\n\tChoose from "tfprob" or "edward"'
                % self.config_general.backend)

        # get domain volume
        self.volume = 1.
        feature_lengths = self.config.feature_lengths
        feature_ranges = self.config.feature_ranges
        for feature_index, feature_type in enumerate(
                self.config.feature_types):
            self.volume *= feature_ranges[feature_index]
        self.inverse_volume = 1 / self.volume

        # compute sampling parameter values
        if self.config.get('sampling_strategies') == 1:
            self.sampling_param_values = np.zeros(1)
        else:
            self.sampling_param_values = np.linspace(
                -1.0, 1.0, self.config.get('sampling_strategies'))
            self.sampling_param_values = self.sampling_param_values[::-1]
        self.sampling_param_values *= self.inverse_volume
コード例 #12
0
class Index:
    def __init__(self,
                 mongo_address,
                 model_name,
                 total_indexes,
                 current_index,
                 n_neighbors=5,
                 algorithm="brute",
                 metric="euclidean"):
        self.logger = Logger()
        client = MongoClient(mongo_address)
        self.db = client.features

        self.records = self.load_data(model_name, total_indexes, current_index)
        features = list(map(lambda x: x["feature"], self.records))
        urls = list(map(lambda x: x["url"], self.records))
        self.logger.info(
            f"Indexing {len(urls)} records for model {model_name} using algorithm {algorithm}"
        )
        self.neighbors = None
        if len(self.records):
            self.neighbors = NearestNeighbors(n_neighbors=min(
                n_neighbors, len(self.records)),
                                              algorithm=algorithm,
                                              metric=metric).fit(features)
            self.logger.info(f"Done indexing {len(self.records)} records")

    def load_data(self, model_name, total_indexes, current_index):
        total_num_of_records = self.db[model_name].count()
        num_of_records_per_index = int(total_num_of_records /
                                       max(total_indexes, 1))
        start_index = current_index * num_of_records_per_index
        self.logger.info(
            f"Load {num_of_records_per_index} / {total_num_of_records} records starting from index {start_index}"
        )
        return list(self.db[model_name].find(
            {}).skip(start_index).limit(num_of_records_per_index))

    def query(self, vector, size=10):
        if not self.neighbors:
            return []

        distances, indices = self.neighbors.kneighbors([vector],
                                                       min(
                                                           len(self.records),
                                                           size),
                                                       return_distance=True)
        distances = distances[0]
        indices = indices[0]
        results = []
        for i in range(len(indices)):
            distance = distances[i]
            index = indices[i]
            url = self.records[index]["url"]
            results.append({"distance": distance, "url": url})
        return results

    def test(self):
        return self.records[1]["feature"]
コード例 #13
0
    def __init__(self, params):
        queue_name = params["queue_name"]
        model_url = params["model_url"]
        host_name = params.get("rabbitmq_hostname", "localhost")
        mongo_address = params.get("mongo_address", "localhost:27017")
        self.bucket_name = params["bucket_name"]
        self.deduplicate_model = params["deduplicate_model"]
        self.deduplicate_threshold = params["deduplicate_threshold"]
        self.logger = Logger()

        while True:
            try:
                if self.set_up_rabbitmq_connection(host_name, queue_name):
                    break
            except Exception as e:
                self.logger.error(
                    f"Failed to connect to rabbitmq queue {queue_name} at {host_name}. Reason: {e}"
                )
                time.sleep(3)
                continue

        # start consuming (blocks)
        self.num_threads = 4
        self.model_name = queue_name

        self.bucket_handler = Bucket(bucket_name)

        self.logger.info(f"Extract worker for model: {queue_name}")
        self.model = model_picker(queue_name, model_url)

        self.logger.info(f"Connecting to mongodb at {mongo_address}")
        client = MongoClient(mongo_address)
        self.db = client.features

        self.channel.start_consuming()
        self.connection.close()
コード例 #14
0
ファイル: Wifi.py プロジェクト: emmathew/HomeAutomation
def scanWifi():
    availableWifiNetworks = []
    shcmd = 'sudo iwlist ' + WIFI_INTERFACE + ' scan'
    response = subprocess.run(shcmd.split(' '),
                              capture_output=True,
                              timeout=30)
    if response.returncode == 0:
        output = response.stdout.decode("utf-8").split('\n')
        macAddr = ''
        ssid = ''
        for line in output:
            if 'Address' in line:
                macAddr = line.split('Address:')[-1][1:-1]
            if 'ESSID' in line:
                ssid = line.split(':')[-1][1:-1]

            if macAddr != '' and ssid != '':
                availableWifiNetworks.append(WifiAp(macAddr, ssid))
                macAddr = ''
                ssid = ''

    else:
        Logger.log(Logger.LogLevel.error, "Wifi Scaning failed")
    return availableWifiNetworks
コード例 #15
0
ファイル: ResultsPage.py プロジェクト: edgarnav/selenium_test
class ResultsPage(ElementsInteractions):
    def __init__(self, driver):
        super().__init__(driver)
        self.driver = driver

    log = Logger.func_logger()

    url_batman = "https://www.tvmaze.com/shows/975/batman"
    class_url = "white-text"
    class_back_btn = "btn-primary"

    def find_url_batman(self):
        elements = self.get_all_elements(self.class_url, "class")
        links = [element.get_attribute('href') for element in elements]
        index = links.index(self.url_batman)
        elements[index].click()

    def return_page(self):
        self.back_page()

    def click_back_btn(self):
        self.click_element(self.class_back_btn, "class")
コード例 #16
0
class ElementsInteractions:

    log = Log.func_logger()

    def __init__(self, driver):
        self.driver = driver

    def locator(self, locator_type):
        if locator_type == "id":
            return By.ID
        elif locator_type == "name":
            return By.NAME
        elif locator_type == "class":
            return By.CLASS_NAME
        elif locator_type == "xpath":
            return By.XPATH
        elif locator_type == "tag":
            return By.TAG_NAME
        else:
            self.log.error("Locator Type : " + locator_type +
                           " entered is not found")
        return False

    def verify_activity(self, activity_name):
        cont_max_time = 0
        while cont_max_time > 10:
            time.sleep(1)
            cont_max_time += 1
            if activity_name == self.driver.current_activity:
                self.log.info("Activity name match with: " + activity_name)
                break
            elif activity_name != self.driver.current_activity and cont_max_time == 10:
                self.take_screenshot(self.driver.activity_name)
                self.log.info("Activity name expected: " + activity_name)
                assert False

    def explicit_wait(self, locator_value, locator_type, max_time):
        try:
            locator_by_type = self.locator(locator_type)
            WebDriverWait(self.driver, max_time).until(
                ec.presence_of_all_elements_located(
                    (locator_by_type, locator_value)))
            self.log.info("Element found with locator " + locator_value +
                          " using locatorType " + locator_by_type)
            return True
        except Exception:
            self.log.error("Element not found with locator " + locator_value +
                           " using locatorType " + locator_type)
            return False

    def get_element(self, locator_value, locator_type):
        element = None
        try:
            locator_by_type = self.locator(locator_type)
            element = self.driver.find_element(locator_by_type, locator_value)
            self.log.info("Element found with locator " + locator_value +
                          " using locatorType " + locator_by_type)
        except Exception:
            self.log.error("Element not found with locator " + locator_value +
                           " using locatorType " + locator_type)
            print_stack()
        return element

    def wait_element(self, locator_value, locator_type):
        try:
            locator_by_type = self.locator(locator_type)
            wait = WebDriverWait(self.driver,
                                 25,
                                 poll_frequency=1,
                                 ignored_exceptions=[
                                     ElementNotVisibleException,
                                     NoSuchElementException
                                 ])
            element = wait.until(
                ec.presence_of_element_located(
                    (locator_by_type, locator_value)))
            self.log.info("Element found with locator value " + locator_value +
                          " using locatorType " + locator_type)
        except Exception:
            self.log.error("Element not found with locator value " +
                           locator_value + " using locatorType " +
                           locator_type)
            print_stack()
            self.take_screenshot(locator_type)
            assert False
        return element

    def press_element(self, locator_value, locator_type):
        try:
            element = self.wait_element(locator_value, locator_type)
            element.click()
            self.log.info("Clicked on element with locator value " +
                          locator_value + " using locatorType " + locator_type)
        except Exception:
            self.log.error("Unable to Click on element with locator value " +
                           locator_value + " using locatorType " +
                           locator_type)
            print_stack()
            assert False

    def send_text(self, locator_value, locator_type, text):
        try:
            element = self.wait_element(locator_value, locator_type)
            element.send_keys(text)
            self.log.info("Sent the text " + text +
                          " in element with locator value " + locator_value +
                          " using locatorType " + locator_type)
        except Exception:
            self.log.error("Unable to Sent the text " + text +
                           " in element with locator value " + locator_value +
                           "using locatorType " + locator_type)
            print_stack()
            self.take_screenshot(locator_type)
            assert False

    def get_text(self, locator_value, locator_type):
        element_text = None
        try:
            element = self.wait_element(locator_value, locator_type)
            element_text = element.text
            self.log.info("Got the text " + element_text +
                          " from element with locator value " + locator_value +
                          " using locatorType " + locator_type)
        except Exception:
            self.log.error(
                "Unable to get the text from element with locator value " +
                locator_value + "using locatorType " + locator_type)
            print_stack()
        return element_text

    def get_attribute(self, locator_value, locator_type, attribute_name):
        attribute = None
        try:
            element = self.wait_element(locator_value, locator_type)
            attribute = element.get_attribute(attribute_name)
            self.log.info("Got the attribute " + attribute_name + " -> " +
                          attribute + " from element with locator value " +
                          locator_value + " using locatorType " + locator_type)
        except Exception:
            self.log.error("Unable to get the attribute " + attribute_name +
                           " from element with locator value " +
                           locator_value + "using locatorType " + locator_type)
            print_stack()
        return attribute

    def is_element_displayed(self, locator_value, locator_type):
        element_displayed = None
        try:
            element = self.wait_element(locator_value, locator_type)
            element_displayed = element.is_displayed()
            self.log.info(
                "Element is Displayed on web page with locator value " +
                locator_value + " using locatorType " + locator_type)
        except Exception:
            self.log.error(
                "Element is not Displayed on web page with locator value " +
                locator_value + " using locatorType " + locator_type)
            print_stack()

        return element_displayed

    def take_screenshot(self, text):
        pass
コード例 #17
0
class Worker:
    def __init__(self, params):
        queue_name = params["queue_name"]
        model_url = params["model_url"]
        host_name = params.get("rabbitmq_hostname", "localhost")
        mongo_address = params.get("mongo_address", "localhost:27017")
        self.bucket_name = params["bucket_name"]
        self.deduplicate_model = params["deduplicate_model"]
        self.deduplicate_threshold = params["deduplicate_threshold"]
        self.logger = Logger()

        while True:
            try:
                if self.set_up_rabbitmq_connection(host_name, queue_name):
                    break
            except Exception as e:
                self.logger.error(
                    f"Failed to connect to rabbitmq queue {queue_name} at {host_name}. Reason: {e}"
                )
                time.sleep(3)
                continue

        # start consuming (blocks)
        self.num_threads = 4
        self.model_name = queue_name

        self.bucket_handler = Bucket(bucket_name)

        self.logger.info(f"Extract worker for model: {queue_name}")
        self.model = model_picker(queue_name, model_url)

        self.logger.info(f"Connecting to mongodb at {mongo_address}")
        client = MongoClient(mongo_address)
        self.db = client.features

        self.channel.start_consuming()
        self.connection.close()

    def set_up_rabbitmq_connection(self, host_name, queue_name):
        credentials = pika.PlainCredentials('admin', 'admin')
        self.connection = pika.BlockingConnection(
            pika.ConnectionParameters(host_name, credentials=credentials))
        self.channel = self.connection.channel()
        self.channel.queue_declare(queue=queue_name, durable=True)
        self.channel_name = "features"
        self.channel.exchange_declare(exchange=self.channel_name,
                                      exchange_type="fanout",
                                      durable=True)
        self.channel.queue_bind(exchange=self.channel_name, queue=queue_name)
        self.channel.basic_qos(prefetch_count=20)
        # set up subscription on the queue
        self.channel.basic_consume(queue_name, self.process)
        return True

    def get_public_url(self, file_name):
        return f"https://storage.googleapis.com/{self.bucket_name}/{file_name}"

    def check_duplication(self, img_name, feature):
        response = requests.post(
            f"http://serving-{self.deduplicate_model}:5000/search?json=true",
            json=feature.tolist())
        if response.status_code != 200:
            print(f"Deduplicate request fails for image {img_name}")
            return False
        result = response.json()

        if len(result) == 0:
            return False

        best_match = result[0]["distance"]
        is_duplicated = best_match <= self.deduplicate_threshold
        if is_duplicated:
            print(f"Image {img_name} already exists")
            self.channel.basic_publish(exchange="",
                                       routing_key="duplicated_files",
                                       body=img_name)
        return is_duplicated

    @FAILURE_COUNTER.count_exceptions()
    @REQUEST_TIME.time()
    def process(self, ch, method, properties, file_name):
        file_name = file_name.decode()
        print(f"Processing file {file_name}")
        downloaded_dir = "./tmp"
        local_file_path = self.bucket_handler.download(file_name,
                                                       downloaded_dir)
        feature = extract_features(local_file_path, self.model)

        if self.deduplicate_model:
            is_duplicated = self.check_duplication(file_name, feature)
            if is_duplicated:
                self.channel.basic_ack(delivery_tag=method.delivery_tag)
                return

        self.db[self.model_name].insert_one({
            "url":
            self.get_public_url(file_name),
            "feature":
            feature.tolist()
        })
        self.channel.basic_ack(delivery_tag=method.delivery_tag)
コード例 #18
0
class Crawler(object):
    """Provides utilities for retrieving website content."""
    def __init__(self, **kwargs):
        agent_type = kwargs.setdefault('agent_type', None)
        referer = kwargs.setdefault('referer', None)
        self.headers = kwargs.setdefault(
            'headers',
            HeaderGenerator().header(agent_type=agent_type, referer=referer))
        self.timeout = kwargs.setdefault('timeout', 10)
        self.resp_attributes = kwargs.setdefault('resp_attributes', [
            'content', 'encoding', 'headers', 'history', 'html', 'json', 'ok',
            'reason', 'status_code', 'text', 'url'
        ])
        self.elem_attributes = kwargs.setdefault('elem_attributes', [
            'absolute_links', 'base_url', 'encoding', 'full_text', 'html',
            'links', 'raw_html', 'text', 'url'
        ])
        self._logging = kwargs.setdefault('logging', True)
        if self._logging:
            log_path = kwargs.setdefault('log_path', '.')
            log_file = kwargs.setdefault('log_file', 'out.log')
            log_name = kwargs.setdefault('log_name', __name__)
            self._logger = Logger(name=log_name,
                                  log_path=log_path,
                                  log_file=log_file)
        self.session = requests_html.HTMLSession()
        self._err_recs = []

    def _push_error(self, error, url, comp_id=None, attr=None):
        c_id = str(comp_id) if comp_id else comp_id
        if self._logging:
            if c_id:
                msg = ('\nRequest for response from {} for company {} ' +
                       'threw exception: {}\n').format(url, c_id, error)
            elif attr:
                msg = (
                    '\nRequest for "{}" from {} threw exception: {}\n'.format(
                        attr, url, error))
            else:
                msg = ('\nRequest for response from {} threw exception: {}\n'.
                       format(url, error))
            self._logger.error(msg)
        self._err_recs.append({
            'time': strftime('%Y-%m-%d %H:%M:%S'),
            'company_profile_id': c_id,
            'attribute': attr,
            'url': url,
            'exception': error
        })

    @error_trap
    def _get_response(self, url, headers, timeout, cookies):

        r = self.session.get(url,
                             headers=headers,
                             timeout=timeout,
                             cookies=cookies)
        if r is None:
            return None, None, url
        else:
            if r.ok:
                if self._logging:
                    self._logger.info(('\nOrig_URL: {}; Ret_URL: {}; ' +
                                       'status: {}, reason: {}\n').format(
                                           url, r.url, r.status_code,
                                           r.reason))
                return r, None, r.url
            else:
                if self._logging:
                    self._logger.warning(('\nOrig_URL: {}; Ret_URL: {}; ' +
                                          'status: {}, reason: {}\n').format(
                                              url, r.url, r.status_code,
                                              r.reason))
                return r, r.reason, r.url

    def response(self,
                 url,
                 headers=None,
                 timeout=None,
                 cookies=None,
                 c_id=None):
        headers = headers or self.headers
        timeout = timeout or self.timeout

        def flip_scheme():
            u = furl(url)
            u.scheme = 'https' if u.scheme == 'http' else 'http'
            return u.url

        f_val, err = self._get_response(url, headers, timeout, cookies)
        if err or f_val[1] or f_val[0] is None:
            flipped_url = flip_scheme()
            f_val, err = self._get_response(flipped_url, headers, timeout,
                                            cookies)
            if err:
                self._push_error(err, flipped_url, comp_id=c_id)
                return None
            else:
                if f_val[0] is None:
                    self._push_error('Response is NULL',
                                     flipped_url,
                                     comp_id=c_id)
                if f_val[1]:
                    self._push_error(f_val[1], flipped_url, comp_id=c_id)
                return f_val[0]
        else:
            return f_val[0]

    @error_trap
    def _check_valid_get(self, obj, a):
        obj_type = type(obj)
        if obj_type == requests_html.HTMLResponse:
            assert a in self.resp_attributes, \
                ('Second parameter must be one of: {}'.
                 format(', '.join(self.resp_attributes)))
        elif ((obj_type == requests_html.HTML)
              or (obj_type == requests_html.Element)):
            assert a in self.elem_attributes, \
                ('Second parameter must be one of: {}'.
                 format(', '.join(self.elem_attributes)))
        else:
            raise TypeError('First parameter must be one of type ' +
                            'requests_html.HTMLResponse, ' +
                            'requests_html.HTML, or ' +
                            'requests_html.Element')
        return

    @error_trap
    def _get(self, obj, a):
        _, err = self._check_valid_get(obj, a)
        if err:
            if type(err) == AssertionError:
                u = self.get(obj, 'url')
            else:
                u = None
            self._push_error(err, u, attr=a)
            return None
        else:
            attr = getattr(obj, a) if a != 'json' else getattr(obj, a)()
            if attr is None:
                u = self.get(obj, 'url') if a != 'url' else None
                self._push_error('NULL attribute', u, attr=a)
            return attr

    def get(self, obj, a):
        attr, err = self._get(obj, a)
        if err:
            u, e = self._get(obj, 'url') if a != 'url' else None, None
            if e:
                self._push_error(e, u, attr='url')
            self._push_error(err, u, attr=a)
        return attr

    @error_trap
    def _write_errors(self, outfile):
        ft = outfile.split('.')[-1]
        assert (ft in ['pkl', 'xlsx', 'csv']), \
            'Output filename must specify a pickle (.pkl), ' + \
            'excel (.xlsx) or csv (.csv) file.'
        if ft == 'pkl':
            pd.DataFrame(self._err_recs).to_pickle(outfile)
        elif ft == 'xlsx':
            pd.DataFrame(self._err_recs).to_excel(outfile,
                                                  engine='xlsxwriter',
                                                  index=False)
        else:
            pd.DataFrame(self._err_recs).to_csv(outfile, index=False)
        return outfile

    def write_errors(self, out_fn):
        outfile, err = self._write_errors(out_fn)
        if err:
            if self._logging:
                msg = '\nError while writing out error log: {}\n'.format(err)
                self._logger.error(msg)
        return outfile
コード例 #19
0
    def __init__(self, config_file=None, config_dict=None):

        Logger.__init__(self, 'ConfigParser', verbosity=0)
        self.config_file = config_file
        self.config_dict = config_dict
コード例 #20
0
import re
import os
import pdb
import sys
import time
import boto3
import xlsxwriter
from argparse import ArgumentParser
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from utilities import Logger

logger = Logger()
s3 = boto3.client('s3')


def read_input(input_path, type):
    """ Read the input file """

    if type == 'csv':
        data = pd.read_csv(os.path.join(input_path)).fillna('')
        return data
    if type == 'xlsx':
        data = pd.ExcelFile(os.path.join(input_path))
        print(data.sheet_names[0])
        df = data.parse(str(data.sheet_names[0]))
        return df


def s3_upload(output, s3, bucket, folder):
コード例 #21
0
    args = parser.parse_args()

    # allow gpu to be specified from command line for testing
    if args.gpu >= 0:
        chosen_device = 'cuda:%d' % (args.gpu)
        globals()['device'] = torch.device(chosen_device)
    print(device)
    args.device = device

    # create log folder and log file
    try:
        now = '%s' % datetime.datetime.now().time()
        now = now.replace(':', '_')
        args.save_dir = os.path.join(args.task, '%d' % args.num_nodes, now)
        os.makedirs(args.save_dir)
    except:
        pass
    out_file = open(os.path.join(args.save_dir, 'results.txt'), 'w+')
    printer = Logger(out_file, args.stdout_print)


    # print the run args
    for key, value in sorted((vars(args)).items()):
        printer.print_out("{}: {}".format(key, value))

    if args.task == 'tsp':
        train_tsp(args)
    elif args.task == 'vrp':
        train_vrp(args)
    else:
        raise ValueError('Task <%s> not understood'%args.task)
コード例 #22
0
ファイル: environment.py プロジェクト: edgarnav/selenium_test
from driver_interactions.ElementsInteractions import ElementsInteractions
from driver_interactions.WebDriver import WebDriver
import utilities.Logger as Logger
import config.ConfigFile as ConfigFile
import time

log = Logger.func_logger()


def before_all(context):
    log.info("Script started")
    context.prepare_driver = WebDriver()
    context.driver = context.prepare_driver.init_driver()
    context.bp = ElementsInteractions(context.driver)
    context.bp.launch_web_page(ConfigFile.url)


def after_all(context):
    time.sleep(2)
    context.driver.quit()
    log.info("Script ended")
コード例 #23
0
from sklearn.preprocessing import OneHotEncoder
#from sklearn.externals import joblib
import joblib
from sklearn.metrics import confusion_matrix
import tensorflow as tf
import pandas as pd

from utilities import Logger
from datetime import datetime

TOKENIZER_PATH = 'model/tokenizer_vocab'
LABEL_ENCODER_PATH = 'model/label_encoder.pkl'
MODEL_PATH = 'model/model_weights.h5'
LOG_FILE_PATH = 'results/' + datetime.now().strftime('%H_%M_%d_%m_%Y.log')

logger = Logger(LOG_FILE_PATH)


def prepare_sequences(tokenizer, texts, options):
    """Tokenizes the textual input and prepares sequences by applying padding"""
    text = [tokenizer.encode(sample) for sample in texts]
    text = preprocessing.sequence.pad_sequences(text, maxlen=options.maxlen)
    return text


def train(options):
    """Loads data, trains the language detection model."""
    train_data = pd.read_csv(options.train, sep='\t')
    train_examples = train_data.text.values.astype(str)

    train_labels = train_data.language.values.astype(str)
コード例 #24
0
ファイル: db_writer.py プロジェクト: tgaudin/gryffin
 def __init__(self, config):
     self.config = config
     Logger.__init__(self, 'DB_Writer', self.config.get('verbosity'))
コード例 #25
0
 def __init__(self, config):
     self.config = config
     Logger.__init__(self,
                     'SampleSelector',
                     verbosity=self.config.get('verbosity'))
     self.num_cpus = multiprocessing.cpu_count()
コード例 #26
0
# see https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time?rq=1
# or https://stackoverflow.com/questions/11536764/how-to-fix-attempted-relative-import-in-non-package-even-with-init-py?rq=1
try:
    from baseattrs import BasicDataAttributes, BaseStats
    from gis import GisAnalyzer, GisAnalyzerWithClusterLabel
    from date_and_time import DateByAnalyzer
    from utilities import Logger
    from dataframeselector import TypeSelector, NameSelector
except ImportError:
    from .baseattrs import BasicDataAttributes, BaseStats
    from .gis import GisAnalyzer, GisAnalyzerWithClusterLabel
    from .date_and_time import DateByAnalyzer
    from .utilities import Logger
    from .dataframeselector import TypeSelector, NameSelector

log = Logger.initialize_log()


class NumericalAnalyzer(BaseStats):
    """
    Provides basic visualization for numerical features
    """
    def __init__(self, data: Union[str, pd.DataFrame]):
        """
        Initializes a NumericalAnalyzer object. Takes data file name or a pandas DataFrame as input

        :param data: link to a file or a pandas DataFrame:
        :type data: Union[str, pd.DataFrame]
        """
        super().__init__(data)
コード例 #27
0
class ElementsInteractions:

    log = Log.func_logger()

    def __init__(self, driver):
        self.driver = driver

    def locator(self, locator_type):
        if locator_type == "id":
            return By.ID
        elif locator_type == "name":
            return By.NAME
        elif locator_type == "class":
            return By.CLASS_NAME
        elif locator_type == "xpath":
            return By.XPATH
        elif locator_type == "css":
            return By.CSS_SELECTOR
        elif locator_type == "tag":
            return By.TAG_NAME
        elif locator_type == "link":
            return By.LINK_TEXT
        elif locator_type == "plink":
            return By.PARTIAL_LINK_TEXT
        else:
            self.log.error("Locator Type : " + locator_type +
                           " entered is not found")
        return False

    def launch_web_page(self, url):
        try:
            self.driver.get(url)
            self.log.info("Web Page Launched with URL : " + url)
        except Exception:
            self.log.info("Web Page not Launched with URL : " + url)

    def go_to_url(self, url):
        self.driver.get(url)

    def verify_page(self, page_name):
        if page_name != self.driver.title:
            self.take_screenshot(self.driver.title)
            assert False

    def back_page(self):
        self.driver.back()

    def explicit_wait(self, locator_value, locator_type, time):
        try:
            locator_by_type = self.locator(locator_type)
            WebDriverWait(self.driver, time).until(
                ec.presence_of_all_elements_located(
                    (locator_by_type, locator_value)))
            self.log.info("Element found with locator " + locator_value +
                          " using locatorType " + locator_by_type)
        except Exception:
            self.log.error("Element not found with locator " + locator_value +
                           " using locatorType " + locator_type)
            print_stack()
            self.take_screenshot(locator_type)
            assert False

    def get_element(self, locator_value, locator_type):
        element = None
        try:
            locator_by_type = self.locator(locator_type)
            element = self.driver.find_element(locator_by_type, locator_value)
            self.log.info("Element found with locator " + locator_value +
                          " using locatorType " + locator_by_type)
        except Exception:
            self.log.error("Element not found with locator " + locator_value +
                           " using locatorType " + locator_type)
            print_stack()
        return element

    def get_all_elements(self, locator_value, locator_type):
        elements = None
        try:
            locator_by_type = self.locator(locator_type)
            elements = self.driver.find_elements(locator_by_type,
                                                 locator_value)
            self.log.info("Elements found with locator " + locator_value +
                          " using locatorType " + locator_by_type)
        except Exception:
            self.log.error("Elements not found with locator " + locator_value +
                           " using locatorType " + locator_type)
            print_stack()
        return elements

    def wait_element(self, locator_value, locator_type):
        try:
            locator_by_type = self.locator(locator_type)
            wait = WebDriverWait(self.driver,
                                 25,
                                 poll_frequency=1,
                                 ignored_exceptions=[
                                     ElementNotVisibleException,
                                     NoSuchElementException
                                 ])
            element = wait.until(
                ec.presence_of_element_located(
                    (locator_by_type, locator_value)))
            self.log.info("WebElement found with locator value " +
                          locator_value + " using locatorType " + locator_type)
        except Exception:
            self.log.error("WebElement not found with locator value " +
                           locator_value + " using locatorType " +
                           locator_type)
            print_stack()
            self.take_screenshot(locator_type)
            assert False
        return element

    def click_element(self, locator_value, locator_type):
        try:
            element = self.wait_element(locator_value, locator_type)
            element.click()
            self.log.info("Clicked on WebElement with locator value " +
                          locator_value + " using locatorType " + locator_type)
        except Exception:
            self.log.error(
                "Unable to Click on WebElement with locator value " +
                locator_value + " using locatorType " + locator_type)
            print_stack()
            assert False

    def send_text(self, text, locator_value, locator_type):
        try:
            element = self.wait_element(locator_value, locator_type)
            element.send_keys(text)
            self.log.info("Sent the text " + text +
                          " in WebElement with locator value " +
                          locator_value + " using locatorType " + locator_type)
        except Exception:
            self.log.error("Unable to Sent the text " + text +
                           " in WebElement with locator value " +
                           locator_value + "using locatorType " + locator_type)
            print_stack()
            self.take_screenshot(locator_type)
            assert False

    def get_text(self, locator_value, locator_type):
        element_text = None
        try:
            element = self.wait_element(locator_value, locator_type)
            element_text = element.text
            self.log.info("Got the text " + element_text +
                          " from WebElement with locator value " +
                          locator_value + " using locatorType " + locator_type)
        except Exception:
            self.log.error("Unable to get the text " + element_text +
                           " from WebElement with locator value " +
                           locator_value + "using locatorType " + locator_type)
            print_stack()

        return element_text

    def is_element_displayed(self, locator_value, locator_type):
        element_displayed = None
        try:
            element = self.wait_element(locator_value, locator_type)
            element_displayed = element.is_displayed()
            self.log.info(
                "WebElement is Displayed on web page with locator value " +
                locator_value + " using locatorType " + locator_type)
        except Exception:
            self.log.error(
                "WebElement is not Displayed on web page with locator value " +
                locator_value + " using locatorType " + locator_type)
            print_stack()

        return element_displayed

    def scroll(self, locator_value, locator_type):
        actions = ActionChains(self.driver)
        try:
            element = self.wait_element(locator_value, locator_type)
            actions.move_to_element(element).perform()
            self.log.info("Scrolled to WebElement with locator value " +
                          locator_value + " using locatorType " + locator_type)
        except Exception:
            self.log.error(
                "Unable to scroll to WebElement with locator value " +
                locator_value + "using locatorType " + locator_type)
            print_stack()

    def take_screenshot(self, text):
        allure.attach(self.driver.get_screenshot_as_png(),
                      name=text,
                      attachment_type=AttachmentType.PNG)