def generate_four_files(data_file, four_files): logger.debug("Generating four files.") n = len(PROTEIN_COLORS) for i in range(n): ocf = data_file.replace(BASE_COLOR, PROTEIN_COLORS[i]) four_files[i] = ocf
def check_that_all_other_colors_exist(data_file): for c in PROTEIN_COLORS: # This loop is always redundant over one of the colors. ocf = data_file.replace(BASE_COLOR, c) logger.debug("File exists: {0}".format(ocf)) assert os.path.isfile(ocf), "File {0} does not exist".format(ocf)
def find_player_enemy(self): for i in range(1, self.size + 2): for j in range(1, self.size + 2): if self.terrain[i][j] == ENEMY: log.debug( f'find_player_position: player position is x = {j}, y = {i}' ) return [i, j]
def download_data(self): for state in self.get_match(): logger.debug("Processing state %s", state) StateParser( state, self.base_output_path, self.current_time, self.file_format ).download_data()
def main(options): logger.debug("Storing files into %s", options.output) if not os.path.isdir(options.output): os.mkdir(options.output) aemet_scraper = parser.MainParser( base_output_path=options.output, current_time=time.strftime("%Y%m%d%H00"), file_format=options.format ) aemet_scraper.download_data()
def _internal_thread(self): while not self._event.wait(self.sleep_time): result = self._func() if result: if self._result_func: logger.debug('on_result call') self._result_func(result) if self.single_shot: break
def _get_station_filename(self, station): """ Returns the full path where to download the file creating the necessary directories. :param station: station code. """ output_dir = os.path.join(self.base_output_path, self.state, station) if not os.path.isdir(output_dir): logger.debug("Creating directory %s", output_dir) os.makedirs(output_dir) return os.path.join( output_dir, "%s.%s" % (self.current_time, self.file_format) )
def process_raw_data_row(raw_data_row): """ Take a row from the original csv file with the data and unpacks the row, its json fields, in to a simple python list. :param raw_data_row: :return: """ def append_to_feature_row(feature_names, feature_row, sub_feature_dict): for feature, value in sorted(sub_feature_dict.items()): logger.debug("{0} value: {1}".format(feature, value)) feature_names.append(feature) feature_row.append(value) # Convert json string to python dict. device_dict = json.loads(raw_data_row[gc.RAW_FEATURE_INDEX["device"]]) geo_network_dict = json.loads( raw_data_row[gc.RAW_FEATURE_INDEX["geoNetwork"]]) totals_dict = json.loads(raw_data_row[gc.RAW_FEATURE_INDEX["totals"]]) traffic_source_dict = json.loads( raw_data_row[gc.RAW_FEATURE_INDEX["trafficSource"]]) adwords_click_info_dict = traffic_source_dict["adwordsClickInfo"] traffic_source_dict.pop("adwordsClickInfo", None) feature_names = [] feature_row = [] logger.debug(" --- raw_data_row contents --- ") for feature, index in sorted(gc.RAW_FEATURE_INDEX.items()): logger.debug("value of feature {0}: {1}".format( feature, raw_data_row[index])) logger.debug(" --- --- --- ") if feature != "device" and feature != "geoNetwork" and feature != "totals" and feature != "trafficSource": feature_names.append(feature) feature_row.append(raw_data_row[index]) append_to_feature_row(feature_names, feature_row, device_dict) append_to_feature_row(feature_names, feature_row, geo_network_dict) append_to_feature_row(feature_names, feature_row, totals_dict) append_to_feature_row(feature_names, feature_row, traffic_source_dict) append_to_feature_row(feature_names, feature_row, adwords_click_info_dict) logger.debug("feature name: {0}".format(feature_names)) logger.debug("feature row: {0}".format(feature_row)) return feature_names, feature_row
def wrapper(*args, **kwargs): if debug_mode: if len(args) > 0: debug_out = "Arguments:" arg_names = inspect.signature(func).parameters arg_name_value = {k: v for (k, v) in zip(arg_names, args)} for name, value in arg_name_value.items(): debug_out = debug_out + '\n' + f"{name}: {value}" log.debug(debug_out) result = func(*args, **kwargs) log.debug(f"{func.__name__} returns {result}") else: result = func(*args, **kwargs) return result
def read_npy_drawing_file_lists_and_return_data_array( x_npy_drawing_file_list, y_npy_drawing_labels_list, le, number_of_classes): """ This function is used mainly to prepare the data for the evaulate/predict methods of the model. """ logger.info( "Reading data from *npy files and packing them into one big numpy array." ) n_x = len(x_npy_drawing_file_list) n_y = len(y_npy_drawing_labels_list) assert n_x == n_y, "x and y dimensions do not match!" x_drawings = np.zeros((n_x, REDUCED_DATA_IMAGE_SIZE, REDUCED_DATA_IMAGE_SIZE, NUMBER_IMAGE_OF_CHANNELS)) y_labels = np.zeros((n_y, number_of_classes)) for i in range(n_x): x = np.load(x_npy_drawing_file_list[i]).reshape( (REDUCED_DATA_IMAGE_SIZE, REDUCED_DATA_IMAGE_SIZE, NUMBER_IMAGE_OF_CHANNELS)) rm = NPY_FILE_REXEXP.match(x_npy_drawing_file_list[i]) assert rm, "Regexp not matched!" l = rm.group("drawing_name") label = le.transform([l]) logger.debug("label: {0}, expected label: {1}".format( label, y_npy_drawing_labels_list[i])) assert label == y_npy_drawing_labels_list[i], "Labels do not match!" x_drawings[i, :, :, :] = x y_labels[i, y_npy_drawing_labels_list[i]] = 1.0 return x_drawings, y_labels
def get_ajax_browser(): binary = FirefoxBinary('/usr/bin/firefox') browser = ajax_driver.Firefox(firefox_binary=binary) logger.debug('AjaxBrowser is used') return browser
def get_browser(): binary = FirefoxBinary('/usr/bin/firefox') browser = webdriver.Firefox(firefox_binary=binary) logger.debug('default firefox is used') return browser
def hdd(self): total, used, free = shutil.disk_usage("/") logger.debug('Total HDD: {} GiB'.format((total // (2**30)))) logger.debug("Used HDD: {} GiB".format(used // (2**30))) logger.debug('Free HDD: {} GiB'.format((free // (2**30)))) return (free // (2**30))
def sockets(self): result = psutil.net_connections() logger.debug('SOCKETS: ' + str(len(result))) return len(result)
def cpu(self): cpu = psutil.cpu_percent(interval=1) logger.debug('CPU: ' + str(cpu)) return cpu
def ram(self): ram = psutil.virtual_memory().available * 100 / psutil.virtual_memory( ).total logger.debug('RAM: ' + str(int(ram))) return int(ram)
def download_data(self): for station in self.get_match(): logger.debug("Processing station %s", station) url = STATION_URL % (station, self.state, station) filename = self._get_station_filename(station) utils.download_content(url, filename)
def append_to_feature_row(feature_names, feature_row, sub_feature_dict): for feature, value in sorted(sub_feature_dict.items()): logger.debug("{0} value: {1}".format(feature, value)) feature_names.append(feature) feature_row.append(value)
def process_raw_data(raw_data, features_to_take): """ Takes the data from the csv file and converts them into a python list. :param raw_data: :return: """ n = len(raw_data) logger.debug("raw_data features: {0}".format(raw_data[0])) feature_names_table = features_to_take feature_values_table = [] # Rows in the data have an uneven number of features. # We need to chose the features that we want. We will chose the features by names. # For this we need a dictionary. feature_names_value_connection_dict = {} # Different rows have different length. We store the distribution of the lengths. number_of_rows_dict = {} feature_names_dict = {} feature_set = set() for i in range(1, n): raw_data_row = raw_data[i] feature_names, feature_row = process_raw_data_row(raw_data_row) nf = len(feature_row) if nf in number_of_rows_dict: number_of_rows_dict[nf] = number_of_rows_dict[nf] + 1 else: number_of_rows_dict[nf] = 1 # For each nf we store the features in set in a dict entry feature_names_dict[nf] pass_feature_names_to_dict(feature_names, feature_names_dict) # We keep track of all the features in the data set in features_set. for j in range(nf): feature_set.add(feature_names[j]) logger.info("we are at row {0}, number of features {1}".format(i, nf)) nr = len(feature_row) assert nf == nr, "ERROR: The number of feature names is not equal to the number of feature values!" # Connect the feature names with their values. for k in range(nf): name = feature_names[k] value = feature_row[k] feature_names_value_connection_dict[name] = value logger.debug(" --- --- --- Names <-> value connection --- --- --- ") logger.debug(feature_names_value_connection_dict) logger.debug(" --- --- --- -------------------------- --- --- --- ") # Take only the features that we want. nftt = len(features_to_take) reduced_feature_row = [None for i in range(nftt)] for l in range(nftt): if features_to_take[l] in feature_names_value_connection_dict: reduced_feature_row[l] = feature_names_value_connection_dict[ features_to_take[l]] else: reduced_feature_row[l] = "None" feature_values_table.append(reduced_feature_row) # Print some statistics about the features that we have. print_dict(number_of_rows_dict) # Print feature_names_dict. Here we would like to also print the number of elements of # each value (Every value is a set of features). for key, val in sorted(feature_names_dict.items()): logger.info(" --- --- --- ") logger.info("key: {0} -- len(val): {1}".format(key, len(val))) for f in val: logger.info(f) # Print all possible features logger.info("number of all possible features: {0}".format( len(feature_set))) all_feature_list = sorted(list(feature_set)) for i in range(len(all_feature_list)): print("\"{0}\": {1},".format(all_feature_list[i], i)) return feature_names_table, feature_values_table
def pack_images_into_npy_array(data_files, ids_labels_dict=None, number_of_images_to_pack=None): if ids_labels_dict is not None: data_type = "train" else: data_type = "test" n = len(data_files) if number_of_images_to_pack is not None: assert number_of_images_to_pack < n, "Number of images {0} to pack cannot be greater then n = {1}".format( number_of_images_to_pack, n) n = number_of_images_to_pack for i in range(n): if i % 10 == 0: logger.info("We are at: {0}/{1}".format(i, n)) df = data_files[i] logger.debug("--- --- ---") check_that_all_other_colors_exist(df) four_files = ["", "", "", ""] generate_four_files(df, four_files) logger.debug("The four generated files:") for f in four_files: logger.debug(f) four_channel_img = pack_images_into_one_npy_array(four_files) logger.debug("Shape of four_channel_img: {0}".format( four_channel_img.shape)) # Get id rc = re.compile( r"../{0}/(?P<id>.*)_(blue|red|yellow|green).png".format(data_type)) rm = rc.match(four_files[0]) assert rm, "Regexp not matched in function: pack_images_into_npy_array!" id = rm.group("id") if ids_labels_dict is not None: label_as_list = np.array(ids_labels_dict[id]) label_text = list(map(int, ids_labels_dict[id])) label_text = list(map(str, ids_labels_dict[id])) label_text = "_".join(label_text) else: # We save in h5 and None is not recognised by it so we need a string. label_as_list = "None" label_text = "None" logger.debug("Label: {0}".format(label_text)) out_file_name = "../{0}_data/img_{1}_s_{2}x{2}_label_{3}.".format( data_type, id, REDUCED_DATA_IMAGE_SIZE, label_text) logger.debug("out file name: {0}".format(out_file_name)) np.save(out_file_name + "npy", four_channel_img) np.savez(out_file_name + "npz", four_channel_img=four_channel_img, label=label_as_list) save_h5_data(out_file_name + "h5", data=four_channel_img, label=label_as_list)
def convert_list_image_to_numpy_array(ndjson_drawing): np_drawing = np.zeros( (SIMPLIFIED_DATA_IMAGE_SIZE, SIMPLIFIED_DATA_IMAGE_SIZE)) logger.debug("---> ndjson drawing START <---") logger.debug(ndjson_drawing) logger.debug("---> ndjson drawing END <---") for i in range(len(ndjson_drawing)): for k in range(1, len(ndjson_drawing[i][0])): logger.debug("drawing[i][0][k]: " + str(ndjson_drawing[i][0][k])) logger.debug("drawing[i][1][k]: " + str(ndjson_drawing[i][1][k])) start = (ndjson_drawing[i][0][k - 1], ndjson_drawing[i][1][k - 1]) end = (ndjson_drawing[i][0][k], ndjson_drawing[i][1][k]) points = get_line(start, end) logger.debug("points: " + str(points)) for p in points: np_drawing[p[0]][p[1]] = 1 # plt.matshow(np_drawing) # plt.show() return np_drawing
def get_profile_browser(): ffprofile = webdriver.FirefoxProfile( '/home/oleh/.mozilla/firefox/80p26oye.default') browser = webdriver.Firefox(ffprofile) logger.debug('firefox profile is used') return browser
def split_the_numpy_drawings_into_test_train_evaluate_datasets( reduced_set=None, test_size=0.05): numpy_drawings_list = get_numpy_drawings_list(reduced_set=reduced_set) logger.debug("numpy_drawings_list length: {0}".format( len(numpy_drawings_list))) logger.debug("Before shuffle") logger.debug(numpy_drawings_list) random.shuffle(numpy_drawings_list) logger.debug("After shuffle") logger.debug(numpy_drawings_list) labels = get_labels(numpy_drawings_list) for i in range(len(labels)): logger.debug("{0} - {1}".format(numpy_drawings_list[i], labels[i])) # logger.info("Our labels: {0}".format(labels)) le = LabelEncoder() le.fit_transform(QUICK_DRAW_LABELS) logger.info("Checking the labels mapping") logger.info(le.transform(["axe", "bat", "baseball_bat"])) labels = le.transform(labels) logger.debug(labels) x_train, x_test, y_train, y_test = train_test_split(numpy_drawings_list, labels, test_size=test_size, random_state=MAIN_SEED) logger.info(" --- Size of split data --- ") logger.info("x_train length: {0}".format(len(x_train))) logger.info("y_train length: {0}".format(len(y_train))) logger.info("x_test length: {0}".format(len(x_test))) logger.info("y_test length: {0}".format(len(y_test))) logger.info(" --- --- ") return x_train, y_train, x_test, y_test, le