Example #1
0
def generate_four_files(data_file, four_files):

    logger.debug("Generating four files.")
    n = len(PROTEIN_COLORS)
    for i in range(n):
        ocf = data_file.replace(BASE_COLOR, PROTEIN_COLORS[i])
        four_files[i] = ocf
Example #2
0
def check_that_all_other_colors_exist(data_file):

    for c in PROTEIN_COLORS:
        # This loop is always redundant over one of the colors.
        ocf = data_file.replace(BASE_COLOR, c)

        logger.debug("File exists: {0}".format(ocf))
        assert os.path.isfile(ocf), "File {0} does not exist".format(ocf)
Example #3
0
 def find_player_enemy(self):
     for i in range(1, self.size + 2):
         for j in range(1, self.size + 2):
             if self.terrain[i][j] == ENEMY:
                 log.debug(
                     f'find_player_position: player position is x = {j}, y = {i}'
                 )
                 return [i, j]
Example #4
0
 def download_data(self):
     for state in self.get_match():
         logger.debug("Processing state %s", state)
         StateParser(
             state,
             self.base_output_path,
             self.current_time,
             self.file_format
         ).download_data()
Example #5
0
def main(options):
    logger.debug("Storing files into %s", options.output)
    if not os.path.isdir(options.output):
        os.mkdir(options.output)
    aemet_scraper = parser.MainParser(
        base_output_path=options.output,
        current_time=time.strftime("%Y%m%d%H00"),
        file_format=options.format
    )
    aemet_scraper.download_data()
Example #6
0
    def _internal_thread(self):
        while not self._event.wait(self.sleep_time):
            result = self._func()

            if result:
                if self._result_func:
                    logger.debug('on_result call')
                    self._result_func(result)

            if self.single_shot:
                break
Example #7
0
    def _get_station_filename(self, station):
        """ Returns the full path where to download the file creating the
        necessary directories.

        :param station: station code.
        """
        output_dir = os.path.join(self.base_output_path, self.state, station)
        if not os.path.isdir(output_dir):
            logger.debug("Creating directory %s", output_dir)
            os.makedirs(output_dir)
        return os.path.join(
            output_dir, "%s.%s" % (self.current_time, self.file_format)
        )
Example #8
0
def process_raw_data_row(raw_data_row):
    """
    Take a row from the original csv file with the data and
    unpacks the row, its json fields, in to a simple python list.

    :param raw_data_row:
    :return:
    """
    def append_to_feature_row(feature_names, feature_row, sub_feature_dict):
        for feature, value in sorted(sub_feature_dict.items()):
            logger.debug("{0} value: {1}".format(feature, value))
            feature_names.append(feature)
            feature_row.append(value)

    # Convert json string to python dict.
    device_dict = json.loads(raw_data_row[gc.RAW_FEATURE_INDEX["device"]])
    geo_network_dict = json.loads(
        raw_data_row[gc.RAW_FEATURE_INDEX["geoNetwork"]])
    totals_dict = json.loads(raw_data_row[gc.RAW_FEATURE_INDEX["totals"]])
    traffic_source_dict = json.loads(
        raw_data_row[gc.RAW_FEATURE_INDEX["trafficSource"]])

    adwords_click_info_dict = traffic_source_dict["adwordsClickInfo"]
    traffic_source_dict.pop("adwordsClickInfo", None)

    feature_names = []
    feature_row = []

    logger.debug(" --- raw_data_row contents --- ")
    for feature, index in sorted(gc.RAW_FEATURE_INDEX.items()):
        logger.debug("value of feature {0}: {1}".format(
            feature, raw_data_row[index]))
        logger.debug(" --- --- --- ")

        if feature != "device" and feature != "geoNetwork" and feature != "totals" and feature != "trafficSource":
            feature_names.append(feature)
            feature_row.append(raw_data_row[index])

    append_to_feature_row(feature_names, feature_row, device_dict)
    append_to_feature_row(feature_names, feature_row, geo_network_dict)
    append_to_feature_row(feature_names, feature_row, totals_dict)
    append_to_feature_row(feature_names, feature_row, traffic_source_dict)
    append_to_feature_row(feature_names, feature_row, adwords_click_info_dict)

    logger.debug("feature name: {0}".format(feature_names))
    logger.debug("feature row: {0}".format(feature_row))

    return feature_names, feature_row
Example #9
0
    def wrapper(*args, **kwargs):
        if debug_mode:

            if len(args) > 0:
                debug_out = "Arguments:"
                arg_names = inspect.signature(func).parameters
                arg_name_value = {k: v for (k, v) in zip(arg_names, args)}

                for name, value in arg_name_value.items():
                    debug_out = debug_out + '\n' + f"{name}: {value}"

                log.debug(debug_out)

            result = func(*args, **kwargs)
            log.debug(f"{func.__name__} returns {result}")

        else:
            result = func(*args, **kwargs)

        return result
Example #10
0
def read_npy_drawing_file_lists_and_return_data_array(
        x_npy_drawing_file_list, y_npy_drawing_labels_list, le,
        number_of_classes):
    """
    This function is used mainly to prepare the data for the evaulate/predict
    methods of the model.

    """
    logger.info(
        "Reading data from *npy files and packing them into one big numpy array."
    )

    n_x = len(x_npy_drawing_file_list)
    n_y = len(y_npy_drawing_labels_list)

    assert n_x == n_y, "x and y dimensions do not match!"

    x_drawings = np.zeros((n_x, REDUCED_DATA_IMAGE_SIZE,
                           REDUCED_DATA_IMAGE_SIZE, NUMBER_IMAGE_OF_CHANNELS))
    y_labels = np.zeros((n_y, number_of_classes))

    for i in range(n_x):
        x = np.load(x_npy_drawing_file_list[i]).reshape(
            (REDUCED_DATA_IMAGE_SIZE, REDUCED_DATA_IMAGE_SIZE,
             NUMBER_IMAGE_OF_CHANNELS))

        rm = NPY_FILE_REXEXP.match(x_npy_drawing_file_list[i])
        assert rm, "Regexp not matched!"
        l = rm.group("drawing_name")

        label = le.transform([l])
        logger.debug("label: {0}, expected label: {1}".format(
            label, y_npy_drawing_labels_list[i]))

        assert label == y_npy_drawing_labels_list[i], "Labels do not match!"

        x_drawings[i, :, :, :] = x
        y_labels[i, y_npy_drawing_labels_list[i]] = 1.0

    return x_drawings, y_labels
Example #11
0
def get_ajax_browser():
    binary = FirefoxBinary('/usr/bin/firefox')
    browser = ajax_driver.Firefox(firefox_binary=binary)
    logger.debug('AjaxBrowser is used')
    return browser
Example #12
0
def get_browser():
    binary = FirefoxBinary('/usr/bin/firefox')
    browser = webdriver.Firefox(firefox_binary=binary)
    logger.debug('default firefox is used')
    return browser
Example #13
0
 def hdd(self):
     total, used, free = shutil.disk_usage("/")
     logger.debug('Total HDD: {} GiB'.format((total // (2**30))))
     logger.debug("Used HDD: {} GiB".format(used // (2**30)))
     logger.debug('Free HDD: {} GiB'.format((free // (2**30))))
     return (free // (2**30))
Example #14
0
 def sockets(self):
     result = psutil.net_connections()
     logger.debug('SOCKETS: ' + str(len(result)))
     return len(result)
Example #15
0
 def cpu(self):
     cpu = psutil.cpu_percent(interval=1)
     logger.debug('CPU: ' + str(cpu))
     return cpu
Example #16
0
 def ram(self):
     ram = psutil.virtual_memory().available * 100 / psutil.virtual_memory(
     ).total
     logger.debug('RAM: ' + str(int(ram)))
     return int(ram)
Example #17
0
 def download_data(self):
     for station in self.get_match():
         logger.debug("Processing station %s", station)
         url = STATION_URL % (station, self.state, station)
         filename = self._get_station_filename(station)
         utils.download_content(url, filename)
Example #18
0
 def append_to_feature_row(feature_names, feature_row, sub_feature_dict):
     for feature, value in sorted(sub_feature_dict.items()):
         logger.debug("{0} value: {1}".format(feature, value))
         feature_names.append(feature)
         feature_row.append(value)
Example #19
0
def process_raw_data(raw_data, features_to_take):
    """
    Takes the data from the csv file and converts them into
    a python list.

    :param raw_data:
    :return:
    """

    n = len(raw_data)

    logger.debug("raw_data features: {0}".format(raw_data[0]))

    feature_names_table = features_to_take
    feature_values_table = []

    # Rows in the data have an uneven number of features.
    # We need to chose the features that we want. We will chose the features by names.
    # For this we need a dictionary.
    feature_names_value_connection_dict = {}

    # Different rows have different length. We store the distribution of the lengths.
    number_of_rows_dict = {}
    feature_names_dict = {}
    feature_set = set()
    for i in range(1, n):
        raw_data_row = raw_data[i]
        feature_names, feature_row = process_raw_data_row(raw_data_row)

        nf = len(feature_row)
        if nf in number_of_rows_dict:
            number_of_rows_dict[nf] = number_of_rows_dict[nf] + 1
        else:
            number_of_rows_dict[nf] = 1

        # For each nf we store the features in set in a dict entry feature_names_dict[nf]
        pass_feature_names_to_dict(feature_names, feature_names_dict)

        # We keep track of all the features in the data set in features_set.
        for j in range(nf):
            feature_set.add(feature_names[j])

        logger.info("we are at row {0}, number of features {1}".format(i, nf))

        nr = len(feature_row)
        assert nf == nr, "ERROR: The number of feature names is not equal to the number of feature values!"

        # Connect the feature names with their values.
        for k in range(nf):
            name = feature_names[k]
            value = feature_row[k]
            feature_names_value_connection_dict[name] = value

        logger.debug(" --- --- --- Names <-> value connection --- --- --- ")
        logger.debug(feature_names_value_connection_dict)
        logger.debug(" --- --- --- -------------------------- --- --- --- ")

        # Take only the features that we want.
        nftt = len(features_to_take)
        reduced_feature_row = [None for i in range(nftt)]
        for l in range(nftt):
            if features_to_take[l] in feature_names_value_connection_dict:
                reduced_feature_row[l] = feature_names_value_connection_dict[
                    features_to_take[l]]
            else:
                reduced_feature_row[l] = "None"

        feature_values_table.append(reduced_feature_row)

    # Print some statistics about the features that we have.
    print_dict(number_of_rows_dict)

    # Print feature_names_dict. Here we would like to also print the number of elements of
    # each value (Every value is a set of features).
    for key, val in sorted(feature_names_dict.items()):
        logger.info(" --- --- --- ")
        logger.info("key: {0} -- len(val): {1}".format(key, len(val)))

        for f in val:
            logger.info(f)

    # Print all possible features
    logger.info("number of all possible features: {0}".format(
        len(feature_set)))

    all_feature_list = sorted(list(feature_set))
    for i in range(len(all_feature_list)):
        print("\"{0}\": {1},".format(all_feature_list[i], i))

    return feature_names_table, feature_values_table
Example #20
0
def pack_images_into_npy_array(data_files,
                               ids_labels_dict=None,
                               number_of_images_to_pack=None):

    if ids_labels_dict is not None:
        data_type = "train"
    else:
        data_type = "test"

    n = len(data_files)

    if number_of_images_to_pack is not None:
        assert number_of_images_to_pack < n, "Number of images {0} to pack cannot be greater then n = {1}".format(
            number_of_images_to_pack, n)
        n = number_of_images_to_pack

    for i in range(n):
        if i % 10 == 0:
            logger.info("We are at: {0}/{1}".format(i, n))

        df = data_files[i]

        logger.debug("--- --- ---")
        check_that_all_other_colors_exist(df)

        four_files = ["", "", "", ""]
        generate_four_files(df, four_files)

        logger.debug("The four generated files:")
        for f in four_files:
            logger.debug(f)

        four_channel_img = pack_images_into_one_npy_array(four_files)
        logger.debug("Shape of four_channel_img: {0}".format(
            four_channel_img.shape))

        # Get id
        rc = re.compile(
            r"../{0}/(?P<id>.*)_(blue|red|yellow|green).png".format(data_type))
        rm = rc.match(four_files[0])

        assert rm, "Regexp not matched in function: pack_images_into_npy_array!"

        id = rm.group("id")

        if ids_labels_dict is not None:

            label_as_list = np.array(ids_labels_dict[id])
            label_text = list(map(int, ids_labels_dict[id]))
            label_text = list(map(str, ids_labels_dict[id]))
            label_text = "_".join(label_text)
        else:
            # We save in h5 and None is not recognised by it so we need a string.
            label_as_list = "None"
            label_text = "None"

        logger.debug("Label: {0}".format(label_text))
        out_file_name = "../{0}_data/img_{1}_s_{2}x{2}_label_{3}.".format(
            data_type, id, REDUCED_DATA_IMAGE_SIZE, label_text)

        logger.debug("out file name: {0}".format(out_file_name))

        np.save(out_file_name + "npy", four_channel_img)

        np.savez(out_file_name + "npz",
                 four_channel_img=four_channel_img,
                 label=label_as_list)

        save_h5_data(out_file_name + "h5",
                     data=four_channel_img,
                     label=label_as_list)
Example #21
0
def convert_list_image_to_numpy_array(ndjson_drawing):
    np_drawing = np.zeros(
        (SIMPLIFIED_DATA_IMAGE_SIZE, SIMPLIFIED_DATA_IMAGE_SIZE))

    logger.debug("---> ndjson drawing START <---")
    logger.debug(ndjson_drawing)
    logger.debug("---> ndjson drawing END <---")

    for i in range(len(ndjson_drawing)):
        for k in range(1, len(ndjson_drawing[i][0])):
            logger.debug("drawing[i][0][k]: " + str(ndjson_drawing[i][0][k]))
            logger.debug("drawing[i][1][k]: " + str(ndjson_drawing[i][1][k]))

            start = (ndjson_drawing[i][0][k - 1], ndjson_drawing[i][1][k - 1])
            end = (ndjson_drawing[i][0][k], ndjson_drawing[i][1][k])

            points = get_line(start, end)
            logger.debug("points: " + str(points))

            for p in points:
                np_drawing[p[0]][p[1]] = 1

    # plt.matshow(np_drawing)
    # plt.show()

    return np_drawing
Example #22
0
def get_profile_browser():
    ffprofile = webdriver.FirefoxProfile(
        '/home/oleh/.mozilla/firefox/80p26oye.default')
    browser = webdriver.Firefox(ffprofile)
    logger.debug('firefox profile is used')
    return browser
Example #23
0
def split_the_numpy_drawings_into_test_train_evaluate_datasets(
        reduced_set=None, test_size=0.05):

    numpy_drawings_list = get_numpy_drawings_list(reduced_set=reduced_set)
    logger.debug("numpy_drawings_list length: {0}".format(
        len(numpy_drawings_list)))
    logger.debug("Before shuffle")
    logger.debug(numpy_drawings_list)

    random.shuffle(numpy_drawings_list)
    logger.debug("After shuffle")
    logger.debug(numpy_drawings_list)

    labels = get_labels(numpy_drawings_list)

    for i in range(len(labels)):
        logger.debug("{0}  -  {1}".format(numpy_drawings_list[i], labels[i]))

    # logger.info("Our labels: {0}".format(labels))

    le = LabelEncoder()
    le.fit_transform(QUICK_DRAW_LABELS)

    logger.info("Checking the labels mapping")
    logger.info(le.transform(["axe", "bat", "baseball_bat"]))

    labels = le.transform(labels)
    logger.debug(labels)

    x_train, x_test, y_train, y_test = train_test_split(numpy_drawings_list,
                                                        labels,
                                                        test_size=test_size,
                                                        random_state=MAIN_SEED)

    logger.info(" --- Size of split data --- ")
    logger.info("x_train length: {0}".format(len(x_train)))
    logger.info("y_train length: {0}".format(len(y_train)))

    logger.info("x_test length: {0}".format(len(x_test)))
    logger.info("y_test length: {0}".format(len(y_test)))
    logger.info(" ---                    --- ")

    return x_train, y_train, x_test, y_test, le