def keep_latest_dict(self, load_event_dicts): ''' Given a list of dicts with table-name, load-date, and row num keys, return a new list with only the dicts that describe the most recent table refresh. @param load_event_dicts: array of dict describing table refresh events. @type load_event_dicts: [{}] ''' # Dict {tbl_name : load_event_dict} to hold # the most recent dict for the respective table. # Use an ordered dict to not mess up order of # passed-in dicts: latest_dicts = OrderedDict() for load_event_dict in load_event_dicts: tbl_nm = load_event_dict['tbl_name'] try: if load_event_dict['time_refreshed'] > latest_dicts[tbl_nm]['time_refreshed']: latest_dicts[tbl_nm] = load_event_dict except KeyError: # First time we see an entry for this table: latest_dicts[tbl_nm] = load_event_dict res = [newest_refresh_dict for newest_refresh_dict in latest_dicts.values()] return res
class Nodes(object): def __init__(self): self.input_nodes = OrderedDict() self.intermediate_nodes = OrderedDict() self.output_nodes = OrderedDict() def __contains__(self, item: nodes.Node): if item in self.intermediate_nodes: return True if item in self.input_nodes: return True if item in self.output_nodes: return True return False def __getitem__(self, item: nodes.Node): if item in self.intermediate_nodes: return self.intermediate_nodes[item] elif item in self.input_nodes: return self.input_nodes[item] elif item in self.output_nodes: return self.output_nodes[item] raise KeyError def __iter__(self): for node in self.input_nodes.values(): yield node for node in self.intermediate_nodes.values(): yield node for node in self.output_nodes.values(): yield node def __str__(self): string = '' for node in self: string += f"\n{node}" return string
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['W1'] = weight_init_std * np.random.rand( input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.rand( hidden_size, output_size) self.params['b2'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads def gradient(self, x, t): self.loss(x, t) dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} grads['W1'] = self.layers['Affine1'].dW grads['b1'] = self.layers['Affine1'].db grads['W2'] = self.layers['Affine2'].dW grads['b2'] = self.layers['Affine2'].db return grads
class Transect(sm.CustomObject): base_type = "transect" type = "transect" datum = "top of face" def __init__(self, **kwargs): super(Transect, self).__init__() self._locs = OrderedDict() self.name = kwargs.get("name", None) start = kwargs.get("start", (0, 0)) # coords (lat, long) end = kwargs.get("end", (0, 0)) self.s_coords = Coords(lat=start[0], lon=start[1]) self.e_coords = Coords(lat=end[0], lon=end[1]) self.ug_values = [] self.ug_xs = [] self.h_face = kwargs.get("h_face", None) self.av_ground_slope = kwargs.get("av_ground_slope", None) self._extra_class_inputs = [ "locs", "start", "end", "ug_values", "ug_xs", "h_face", "av_ground_slope", "datum" ] self.inputs = self.inputs + self._extra_class_inputs def add_cpt_by_coords(self, cpt, coords, **kwargs): esp = kwargs.get("esp", None) loc = Loc(cpt=cpt, name=cpt.file_name, esp=esp) loc.coords = coords return self.add_loc_by_coords(coords, loc) def add_cpt(self, cpt, x, **kwargs): offset = kwargs.get("offset", None) off_dir = kwargs.get("off_dir", "-") esp = kwargs.get("esp", None) loc = Loc(cpt=cpt, name=cpt.file_name, offset=offset, off_dir=off_dir, esp=esp) return self.add_loc(x, loc) def get_cpt_names(self): _cpts = [] for x in self.locs: _cpts.append(self.locs[x].cpt_file_name) return _cpts def set_ids(self): for i, loc_name in enumerate(self.locs): self.locs[loc_name].id = i + 1 if self.locs[loc_name].soil_profile is not None: self.locs[loc_name].soil_profile.id = i + 1 def to_dict(self, extra=(), **kwargs): outputs = OrderedDict() skip_list = ["locs"] if hasattr(self, "inputs"): full_inputs = list(self.inputs) + list(extra) else: full_inputs = list(extra) for item in full_inputs: if item not in skip_list: value = self.__getattribute__(item) outputs[item] = sf.collect_serial_value(value) return outputs def add_to_dict(self, models_dict, **kwargs): if self.base_type not in models_dict: models_dict[self.base_type] = OrderedDict() outputs = self.to_dict(**kwargs) models_dict[self.base_type][self.unique_hash] = outputs for loc_num in self.locs: self.locs[loc_num].add_to_dict( models_dict, parent_dict=models_dict[self.base_type][self.unique_hash]) def reset_cpt_folder_paths(self, folder_path): for loc_name in self.locs: self.locs[loc_name].cpt_folder_path = folder_path @property def tran_line(self): try: from liquepy.spatial.map_coords import Line return Line(self.s_coords, self.e_coords) except ImportError as e: warnings.warn('Need to import spatial packages', stacklevel=3) warnings.warn(e, stacklevel=3) return None @property def x_end(self): return self.tran_line.dist @property def locs(self): return self._locs def add_loc(self, x: float, loc): loc.x = x self._locs[x] = loc self._sort_locs() return self._locs[x] def add_loc_by_coords(self, coords, loc): from liquepy.spatial import map_coords if not sum(self.start) or not sum(self.end): raise ValueError("start and end coordinates must be set") loc.x = map_coords.calc_proj_line_dist(self.tran_line, coords) loc.offset = map_coords.calc_line_offset(self.tran_line, coords) loc.off_dir = map_coords.calc_line_off_dir(self.tran_line, coords) self._locs[loc.x] = loc self._sort_locs() return self._locs[loc.x] @locs.setter def locs(self, locs): for loc_id in locs: loc_dist = locs[loc_id]["x"] self.locs[loc_dist] = Loc() sm.add_to_obj(self.locs[loc_dist], locs[loc_id]) def _sort_locs(self): """ Sort the locs by distance. :return: """ self._locs = OrderedDict(sorted(self._locs.items(), key=lambda t: t[0])) def get_loc_by_name(self, name): for x in self.locs: if self.locs[x].name == name: return self.locs[x] def get_loc_by_dist(self, dist): return self.locs[dist] def loc(self, index): index = int(index) if index == 0: raise KeyError("index=%i, but must be 1 or greater." % index) return list(self._locs.values())[index - 1] def remove_loc(self, loc_int): key = list(self._locs.keys())[loc_int - 1] del self._locs[key] def replace_loc(self, loc_int, soil): key = list(self._locs.keys())[loc_int - 1] self._locs[key] = soil @property def start(self): return self.s_coords.as_tuple @property def end(self): return self.e_coords.as_tuple @start.setter def start(self, values): self.s_coords = Coords(lat=values[0], lon=values[1]) @end.setter def end(self, values): self.e_coords = Coords(lat=values[0], lon=values[1])
def create_atlas(animal, create): fileLocationManager = FileLocationManager(animal) atlas_name = 'atlasV7' THUMBNAIL_DIR = os.path.join(ROOT_DIR, animal, 'preps', 'CH1', 'thumbnail') ATLAS_PATH = os.path.join(DATA_PATH, 'atlas_data', atlas_name) ORIGIN_PATH = os.path.join(ATLAS_PATH, 'origin') VOLUME_PATH = os.path.join(ATLAS_PATH, 'structure') OUTPUT_DIR = os.path.join(fileLocationManager.neuroglancer_data, 'atlas') if os.path.exists(OUTPUT_DIR): shutil.rmtree(OUTPUT_DIR) os.makedirs(OUTPUT_DIR, exist_ok=True) origin_files = sorted(os.listdir(ORIGIN_PATH)) volume_files = sorted(os.listdir(VOLUME_PATH)) sqlController = SqlController(animal) resolution = sqlController.scan_run.resolution surface_threshold = 0.8 SCALE = (10 / resolution) structure_volume_origin = {} for volume_filename, origin_filename in zip(volume_files, origin_files): structure = os.path.splitext(volume_filename)[0] if structure not in origin_filename: print(structure, origin_filename) break color = get_structure_number(structure.replace('_L', '').replace('_R', '')) origin = np.loadtxt(os.path.join(ORIGIN_PATH, origin_filename)) volume = np.load(os.path.join(VOLUME_PATH, volume_filename)) volume = np.rot90(volume, axes=(0, 1)) volume = np.flip(volume, axis=0) volume[volume > surface_threshold] = color volume = volume.astype(np.uint8) structure_volume_origin[structure] = (volume, origin) col_length = sqlController.scan_run.width/SCALE row_length = sqlController.scan_run.height/SCALE z_length = len(os.listdir(THUMBNAIL_DIR)) atlasV7_volume = np.zeros(( int(row_length), int(col_length), z_length), dtype=np.uint8) print('atlas volume shape', atlasV7_volume.shape) ##### actual data for both sets of points, pixel coordinates centers = OrderedDict(MD589_centers) centers_list = [] for value in centers.values(): centers_list.append((value[1]/SCALE, value[0]/SCALE, value[2])) COM = np.array(centers_list) atlas_com_centers = OrderedDict() atlas_all_centers = {} for structure, (volume, origin) in sorted(structure_volume_origin.items()): midcol, midrow, midz = origin row_start = midrow + row_length / 2 col_start = midcol + col_length / 2 z_start = midz / 2 + z_length / 2 row_end = row_start + volume.shape[0] col_end = col_start + volume.shape[1] z_end = z_start + (volume.shape[2] + 1) / 2 midcol = (col_end + col_start) / 2 midrow = (row_end + row_start) / 2 midz = (z_end + z_start) / 2 if structure in centers.keys(): atlas_com_centers[structure] = [midrow, midcol, midz] atlas_all_centers[structure] = [midrow, midcol, midz] ATLAS_centers = OrderedDict(atlas_com_centers) ATLAS = np.array(list(ATLAS_centers.values())) #### both sets of data are scaled to stack of DK52 pprint(COM) pprint(ATLAS) #####Transform to auto align r_auto, t_auto = align_point_sets(ATLAS.T, COM.T) rotationpath = os.path.join(ATLAS_PATH, f'atlas2{animal}.rotation.npy') np.save(rotationpath, r_auto) translatepath = os.path.join(ATLAS_PATH, f'atlas2{animal}.translation.npy') np.save(translatepath, t_auto) # Litao, look at the start and end for these structures, the x and y look good # but the z (section) is off debug = True for structure, (volume, origin) in sorted(structure_volume_origin.items()): print(str(structure).ljust(7),end=": ") source_point = np.array(atlas_all_centers[structure]) # get adjusted x,y,z from above loop results = (r_auto @ source_point + t_auto.T).reshape(1,3) # transform to fit x = results[0][1] # new x y = results[0][0] # new y z = results[0][2] # z x = x - volume.shape[0]/2 y = y - volume.shape[1]/2 x_start = int( round(x)) y_start = int( round(y)) z_start = int(z - volume.shape[2]/4) x_end = int( round(x_start + volume.shape[0])) y_end = int( round(y_start + volume.shape[1])) z_end = int( round(z_start + (volume.shape[2] + 1) // 2)) if debug: #print('volume shape', volume.shape, end=" ") print('COM row', str(int(y)).rjust(4), 'mid col', str(int(x)).rjust(4), 'mid z', str(int(z)).rjust(4), end=" ") print('Row range', str(y_start).rjust(4), str(y_end).rjust(4), 'col range', str(x_start).rjust(4), str(x_end).rjust(4), 'z range', str(z_start).rjust(4), str(z_end).rjust(4), end=" ") if structure in centers.keys(): xo,yo,zo = MD589_centers[structure] print('COM off by:', round(x*SCALE - xo, 2), round(y*SCALE - yo, 2), round(z - zo, 2), end=" ") z_indices = [z for z in range(volume.shape[2]) if z % 2 == 0] volume = volume[:, :, z_indices] volume = np.swapaxes(volume, 0, 1) try: atlasV7_volume[y_start:y_end, x_start:x_end, z_start:z_end] += volume except ValueError as ve: print('Bad fit', end=" ") print() resolution = int(resolution * 1000 * SCALE) print('Shape of downsampled atlas volume', atlasV7_volume.shape) print('Resolution at', resolution) if create: atlasV7_volume = np.rot90(atlasV7_volume, axes=(0, 1)) atlasV7_volume = np.fliplr(atlasV7_volume) atlasV7_volume = np.flipud(atlasV7_volume) atlasV7_volume = np.fliplr(atlasV7_volume) offset = [0,0,0] ng = NumpyToNeuroglancer(atlasV7_volume, [resolution, resolution, 20000], offset=offset) ng.init_precomputed(OUTPUT_DIR) ng.add_segment_properties(get_segment_properties()) ng.add_downsampled_volumes() ng.add_segmentation_mesh() #outpath = os.path.join(ATLAS_PATH, f'{atlas_name}.tif') #io.imsave(outpath, atlasV7_volume.astype(np.uint8)) end = timer() print(f'Finito! Program took {end - start} seconds')
if "PARstart" in x_chr_dict[entry]: PAR_boolean_windows = ( df_dict["onlyX"][entry]["WINDOW"] * args.window_step >= x_chr_dict[entry]["PARstart"]) & ( df_dict["onlyX"][entry]["WINDOW"] * args.window_step + args.window_size <= x_chr_dict[entry]["PARend"]) df_dict["noPAR"][entry] = df_dict["onlyX"][entry][ ~PAR_boolean_windows] df_dict["PAR"][entry] = df_dict["onlyX"][entry][ PAR_boolean_windows] df_number_dict[entry] += 2 #if x_chr_dict: # df_dict["noX"][entry]["density"] = df_dict["noX"][entry]["All"] * args.multiplier / args.window_size # df_dict["onlyX"][entry]["density"] = df_dict["onlyX"][entry]["All"] * args.multiplier / args.window_size df_number_list = list(df_number_dict.values()) figure_height = 4 figure_width = max(1, int(sum(df_number_list) / 2)) dpi = 300 fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(figure_width, figure_height), dpi=dpi) fig.patch.set_color('white') data_list = [] label_list = [] x_pos_list = [] inner_distance = 1
class CourseSummaryStats(object): ''' Holds the summary statistics of all offerings of a single class. Included: average percentages of responses across all offerings of one course. So there will be a single instance for each course name. Also included: a list of termcore numbers when the course was offered. ''' #-------------------------------- # Constructor CourseSummaryStats #------------------ def __init__(self, course_stats_obj_list): self.course_name = course_stats_obj_list[0]['crse_code'] num_offerings = len(course_stats_obj_list) # Compute average percentage in each difficulty level # of each offering: diff_level1_sum = 0 diff_level2_sum = 0 diff_level3_sum = 0 diff_level4_sum = 0 diff_level5_sum = 0 diff_level6_sum = 0 diff_level7_sum = 0 diff_level8_sum = 0 # Sum the percentage difficulties for each # difficulty for all offerings of the current course: for course_stat_obj in course_stats_obj_list: diff_level1_sum += course_stat_obj.percent_by_difficulty(1) diff_level2_sum += course_stat_obj.percent_by_difficulty(2) diff_level3_sum += course_stat_obj.percent_by_difficulty(3) diff_level4_sum += course_stat_obj.percent_by_difficulty(4) diff_level5_sum += course_stat_obj.percent_by_difficulty(5) diff_level6_sum += course_stat_obj.percent_by_difficulty(6) diff_level7_sum += course_stat_obj.percent_by_difficulty(7) diff_level8_sum += course_stat_obj.percent_by_difficulty(8) self.summary_dict = OrderedDict({ CourseStats.DIFF_LEVEL1: diff_level1_sum / num_offerings, CourseStats.DIFF_LEVEL2: diff_level2_sum / num_offerings, CourseStats.DIFF_LEVEL3: diff_level3_sum / num_offerings, CourseStats.DIFF_LEVEL4: diff_level4_sum / num_offerings, CourseStats.DIFF_LEVEL5: diff_level5_sum / num_offerings, CourseStats.DIFF_LEVEL6: diff_level6_sum / num_offerings, CourseStats.DIFF_LEVEL7: diff_level7_sum / num_offerings, CourseStats.DIFF_LEVEL8: diff_level8_sum / num_offerings }) self.num_offerings = num_offerings # List of termcores when course was offered self.termcores = [ stats_obj['termcore'] for stats_obj in course_stats_obj_list ] #-------------------------------- # values #------------------ def values(self): ''' Return an iterator over summary percentage of each difficulty level. Order is guaranteed to be from level 1 to level 8. ''' return self.summary_dict.values() #-------------------------------- # __getitem__ #------------------ def __getitem__(self, key): ''' If key is a DIFF_LEVEL<n> instance, return that difficult level's summary response percentage. Else key must be one of o num_offerings o termcores o course_name The non-difficulty keys would likely be better done with properties. But this is very clear: ''' if isinstance(key, Interval): return self.summary_dict[key] elif key == 'num_offerings': return self.num_offerings elif key == 'termcores': return self.termcores elif key == 'course_name': return self.course_name else: raise KeyError("Key '%s' not in dict." % key) #-------------------------------- # percent_by_difficulty #------------------ def percent_by_difficulty(self, difficulty_level): ''' Given either an integer difficulty level between 1 and 8, or one of the instances CourseStats.DIFF_LEVEL<n>, return this course's average percentage across all offerings of responses that lie in that difficulty range. @param difficulty_level: indicator of which difficulty level is wanted @type difficulty_level: {int | Interval} ''' if difficulty_level == 1 or difficulty_level == CourseStats.DIFF_LEVEL1: return self.summary_dict['diff_level1_perc'] elif difficulty_level == 2 or difficulty_level == CourseStats.DIFF_LEVEL2: return self.summary_dict['diff_level2_perc'] elif difficulty_level == 3 or difficulty_level == CourseStats.DIFF_LEVEL3: return self.summary_dict['diff_level3_perc'] elif difficulty_level == 4 or difficulty_level == CourseStats.DIFF_LEVEL4: return self.summary_dict['diff_level4_perc'] elif difficulty_level == 5 or difficulty_level == CourseStats.DIFF_LEVEL5: return self.summary_dict['diff_level5_perc'] elif difficulty_level == 6 or difficulty_level == CourseStats.DIFF_LEVEL6: return self.summary_dict['diff_level6_perc'] elif difficulty_level == 7 or difficulty_level == CourseStats.DIFF_LEVEL7: return self.summary_dict['diff_level7_perc'] elif difficulty_level == 8 or difficulty_level == CourseStats.DIFF_LEVEL8: return self.summary_dict['diff_level8_perc']
def contar_palabras( bs, link ): #Funcion para contar las palabras del HTML, resibimos el html y el link de la pagina try: texto = bs.getText().lower( ) #Pasamos el HTML a texto y lo convertimos a minisculas result = re.sub( r'[^\w\s]', '', texto ) #eliminamos las comillas simples, comillas dobles, parentesis, etc. stop_words = set(stopwords.words( 'spanish')) #indicamos una "lista" de pronombres en español word_tokens = word_tokenize( result) #separamos las cadenas del texto en palabras word_tokens = list( filter(lambda token: token not in string.punctuation, word_tokens)) #eliminamos los signos de puntuacion filtro = [] #arreglo que nos servira como filtro más adelante for palabra in word_tokens: #verificamos cada palabra que hay en la lista sin singnos de puntuacion if palabra not in stop_words: #verificamos si la palabra no esta en la lista de pronombres filtro.append( palabra) #si no esta, la añadimos a arreglo filtro c = Counter( filtro ) #contamos las palabras que se repitan en el arreglo de filtro num = 3 #Numero de palabras a mostrar y = OrderedDict( c.most_common(num) ) #le indicamos que las ordene de las más repetidas a las que menos se repitan, solo se mostraran el numero de palabras que se haya ingresado anteriormente lista = list(y)[:3] #Pasamos a una lista las palabras mas comunes repetido = list(y.values( )) #pasamos a una lista el numero que veces que se repite cada palabra #Guardamos en la BD las palabras que màs se repiten enlaces.update_one({'direccion': link}, {'$set': { 'palabra1': lista[0] }}) enlaces.update_one({'direccion': link}, {'$set': { 'palabra2': lista[1] }}) enlaces.update_one({'direccion': link}, {'$set': { 'palabra3': lista[2] }}) #Guardamos en la BD cuantas veces se repite cada palabra enlaces.update_one({'direccion': link}, {'$set': { 'ranking1': repetido[0] }}) enlaces.update_one({'direccion': link}, {'$set': { 'ranking2': repetido[1] }}) enlaces.update_one({'direccion': link}, {'$set': { 'ranking3': repetido[2] }}) except: print('Ha ocurrido un error con las palabras')
##### actual data for both sets of points MD589_centers = { '5N_L': [23790, 13025, 160], '5N_R': [20805, 14163, 298], '7n_L': [20988, 18405, 177], '7n_R': [24554, 13911, 284], 'DC_L': [24482, 11985, 134], 'DC_R': [20424, 11736, 330], 'LC_L': [25290, 11750, 180], 'LC_R': [24894, 12079, 268], 'SC': [24226, 6401, 220] } MD589_centers = OrderedDict(MD589_centers) MD589_list = [] for value in MD589_centers.values(): MD589_list.append((value[1] / SCALE, value[0] / SCALE, value[2])) MD589 = np.array(MD589_list) atlas_centers = { '5N_L': [686.53, 990.08, 155.38], '5N_R': [686.53, 990.08, 292.62], '7n_L': [725.04, 1034.44, 172.21], '7n_R': [725.04, 1034.44, 275.79], 'DC_L': [806.29, 955.16, 130.35], 'DC_R': [806.29, 955.16, 317.65], 'LC_L': [731.55, 934.49, 182.33], 'LC_R': [731.55, 934.49, 265.67], 'SC': [602.87, 757.7, 225.5], }
def _report_textual_results(self, tally_coll, res_dir): ''' Give a sequence of tallies with results from a series of batches, create long outputs, and inputs lists from all tallies Computes information retrieval type values: precision (macro/micro/weighted/by-class) recall (macro/micro/weighted/by-class) f1 (macro/micro/weighted/by-class) acuracy balanced_accuracy Combines these results into a Pandas series, and writes them to a csv file. That file is constructed from the passed-in res_dir, appended with 'ir_results.csv'. Finally, constructs Github flavored tables from the above results, and posts them to the 'text' tab of tensorboard. Returns the results measures Series :param tally_coll: collect of tallies from batches :type tally_coll: ResultCollection :param res_dir: directory where all .csv and other result files are to be written :type res_dir: str :return results of information retrieval-like measures :rtype: pandas.Series ''' all_preds = [] all_labels = [] for tally in tally_coll.tallies(phase=LearningPhase.TESTING): all_preds.extend(tally.preds) all_labels.extend(tally.labels) res = OrderedDict({}) res['prec_macro'] = precision_score(all_labels, all_preds, average='macro', zero_division=0) res['prec_micro'] = precision_score(all_labels, all_preds, average='micro', zero_division=0) res['prec_weighted'] = precision_score(all_labels, all_preds, average='weighted', zero_division=0) res['prec_by_class'] = precision_score(all_labels, all_preds, average=None, zero_division=0) res['recall_macro'] = recall_score(all_labels, all_preds, average='macro', zero_division=0) res['recall_micro'] = recall_score(all_labels, all_preds, average='micro', zero_division=0) res['recall_weighted'] = recall_score(all_labels, all_preds, average='weighted', zero_division=0) res['recall_by_class'] = recall_score(all_labels, all_preds, average=None, zero_division=0) res['f1_macro'] = f1_score(all_labels, all_preds, average='macro', zero_division=0) res['f1_micro'] = f1_score(all_labels, all_preds, average='micro', zero_division=0) res['f1_weighted'] = f1_score(all_labels, all_preds, average='weighted', zero_division=0) res['f1_by_class'] = f1_score(all_labels, all_preds, average=None, zero_division=0) res['accuracy'] = accuracy_score(all_labels, all_preds) res['balanced_accuracy'] = balanced_accuracy_score( all_labels, all_preds) res_series = pd.Series(list(res.values()), index=list(res.keys())) # Write information retrieval type results # to a one-line .csv file, using pandas Series # as convenient intermediary: res_csv_path = os.path.join(res_dir, 'ir_results.csv') res_series.to_csv(res_csv_path) res_rnd = {} for meas_nm, meas_val in res.items(): # Measure results are either floats (precision, recall, etc.), # or np arrays (e.g. precision-per-class). For both # cases, round each measure to one digit: res_rnd[meas_nm] = round(meas_val,1) if type(meas_val) == float \ else meas_val.round(1) ir_measures_skel = { 'col_header': ['precision', 'recall', 'f1'], 'row_labels': ['macro', 'micro', 'weighted'], 'rows': [[ res_rnd['prec_macro'], res_rnd['recall_macro'], res_rnd['f1_macro'] ], [ res_rnd['prec_micro'], res_rnd['recall_micro'], res_rnd['f1_micro'] ], [ res_rnd['prec_weighted'], res_rnd['recall_weighted'], res_rnd['f1_weighted'] ]] } ir_per_class_rows = [[ prec_class, recall_class, f1_class ] for prec_class, recall_class, f1_class in zip( res_rnd['prec_by_class'], res_rnd['recall_by_class'], res_rnd['f1_by_class'])] ir_by_class_skel = { 'col_header': ['precision', 'recall', 'f1'], 'row_labels': self.class_names, 'rows': ir_per_class_rows } accuracy_skel = { 'col_header': ['accuracy', 'balanced_accuracy'], 'row_labels': ['Overall'], 'rows': [[res_rnd['accuracy'], res_rnd['balanced_accuracy']]] } ir_measures_tbl = GithubTableMaker.make_table(ir_measures_skel, sep_lines=False) ir_by_class_tbl = GithubTableMaker.make_table(ir_by_class_skel, sep_lines=False) accuracy_tbl = GithubTableMaker.make_table(accuracy_skel, sep_lines=False) # Write the markup tables to Tensorboard: self.writer.add_text('Information retrieval measures', ir_measures_tbl, global_step=0) self.writer.add_text('Per class measures', ir_by_class_tbl, global_step=0) self.writer.add_text('Accuracy', accuracy_tbl, global_step=0) return res_series
class Experiment(Observable): """ This class represents experiment. """ DEFAULT_FEATURE_EXTRACTOR_NAME = "Pass" """Name of default feature extractor that is set to attribute. If exists.""" class AttributeSettings(Enum): """ Possible settings types that could be set to an attribute. """ USE = 0 PATH = 1 FEATURE_EXTRACTOR = 2 LABEL = 3 def __init__(self, filePath: str = None): """ Creation of new experiment or loading of saved. :param filePath: Path to file. If None than new experiment is created, else saved experiment is loaded. :type filePath: str| None :raise RuntimeError: When there is a problem with plugins. :raise ExperimentLoadException: When there is a problem with loading. """ super().__init__() self._dataset = None self._attributesSet = {} self._label = None self._featuresSele = [] self._classifiers = [] # classifiers for testing self._evaluationMethod = None self.loadSavePath = None # stores path from which this exp was loaded or where is saved self.results = None # let's load the plugins that are now available # must be called before experiment loading # because sets default values self._loadPlugins() if filePath is not None: # load saved experiment self._load(filePath) self.loadSavePath = filePath self._dataStats = None self._origDataStats = None self._attributesThatShouldBeUsedCache = {} def save(self, filePath): """ Saves experiment configuration to given file. :param filePath: Path to experiment file. :type filePath: str """ with open(filePath, "wb") as saveF: # let's create Experiment version for saving data = { "dataSet": self._dataset, "attributesSet": self._attributesSet, "label": self._label, "featuresSele": self._featuresSele, "classifiers": self._classifiers, "evaluationMethod": self._evaluationMethod, "results": self.results } # save it pickle.dump(data, saveF) self.loadSavePath = filePath LastUsedExperiments().used(filePath) def setResults(self, r): """ Sets results. Suitable for use as callback. :param r: new results. :type r: Results """ self.results = r def _load(self, filePath): """ Loads saved experiment configuration from given file. :param filePath: Path to experiment file. :type filePath: str :raise ExperimentLoadException: When there is a problem with loading. """ with open(filePath, "rb") as loadF: try: lE = pickle.load(loadF) except: raise ExperimentLoadException( "Couldn't load given experiment.") if not isinstance(lE, dict): raise ExperimentLoadException( "Couldn't load given experiment.") # check that we have loaded all attributes for a in ["dataSet", "attributesSet", "label", \ "featuresSele", "classifiers", "evaluationMethod"]: if a not in lE: raise ExperimentLoadException( "Couldn't load given experiment.") if not isinstance(lE["dataSet"], DataSet): raise ExperimentLoadException( "Couldn't load given experiment.") self._dataset = lE["dataSet"] if not isinstance(lE["attributesSet"], dict): raise ExperimentLoadException( "Couldn't load given experiment.") self._attributesSet = lE["attributesSet"] if not isinstance(lE["label"], str) and lE["label"] is not None: raise ExperimentLoadException( "Couldn't load given experiment.") self._label = lE["label"] if not isinstance(lE["featuresSele"], list) and \ any(not isinstance(fs, FeaturesSelector) for fs in lE["featuresSele"]): raise ExperimentLoadException( "Couldn't load given experiment.") self._featuresSele = lE["featuresSele"] if not isinstance(lE["classifiers"], list) and \ any(not isinstance(c, Classifier) for c in lE["classifiers"]): raise ExperimentLoadException( "Couldn't load given experiment.") self._classifiers = lE["classifiers"] if not isinstance(lE["evaluationMethod"], Validator): raise ExperimentLoadException( "Couldn't load given experiment.") self._evaluationMethod = lE["evaluationMethod"] if lE["results"] is not None and not isinstance( lE["results"], Results): raise ExperimentLoadException( "Couldn't load given experiment.") self.results = lE["results"] LastUsedExperiments().used(filePath) def useDataSubset(self): """ Use only defined subset of data. Subset is defined by selected samples. Samples are selected according to constraints defined in dataStats. """ self._dataset.useSubset(None) # clear the old one if self._dataStats is not None: subset = np.empty(self._dataStats.numberOfSamples) counters = copy.copy(self._dataStats.classSamples) cnt = 0 for i, sample in enumerate(self._dataset): l = sample[self._label] try: if counters[l] > 0: counters[l] -= 1 subset[cnt] = i cnt += 1 except KeyError: # probably class that we want to omit pass self.dataset.useSubset(subset) @property def dataStats(self): """ The data stats. Working copy of original data stats. :return: Actual stats. :rtype: ExperimentDataStatistics | None """ return self._dataStats @property def origDataStats(self): """ Original data stats. Maybe you are looking for working copy of data stats that you can get with dataStats. :return: Original data stats. :rtype: ExperimentDataStatistics | None """ return self._origDataStats @Observable._event("NEW_DATA_STATS") def setDataStats(self, stats, actOnly=False): """ Set the data stats. This method overrides working copy and original data stats. :param stats: New stats. :type stats: ExperimentDataStatistics :param actOnly: If true than overrides only working copy. If false than overrides original data to. If no original data was set (origData is None) than this parameter is ignored and origData is set too. :type actOnly: bool """ self._dataStats = copy.deepcopy(stats) if self._origDataStats is None or not actOnly: self._origDataStats = stats else: # We must add classes that were filtered out. classSamples = self._dataStats.classSamples deactivate = [] for c in self._origDataStats.classes: if c not in classSamples: # we set the max, but we must deactivate it # The max is set because if user will decide # that she/he wants to use this class, than # we must set somu initial number of samples. classSamples[c] = self._origDataStats.classSamples[c] deactivate.append(c) self._dataStats.classSamples = classSamples # lets deactivate it for c in deactivate: self._dataStats.deactivateClass(c) def _loadPlugins(self): """ Loads available plugins. Adds default. :raise RuntimeError: When there is problem with plugins. """ # available features extractors if len(FEATURE_EXTRACTORS) == 0: raise RuntimeError("There are no features extractors plugins.") feTmp = {} for fe in FEATURE_EXTRACTORS.values(): if fe.getName() in feTmp: # wow, name collision raise RuntimeError( "Collision of features extractors names. For name: " + fe.getName()) feTmp[fe.getName()] = fe # lets put the default feature extractor as the first if exists if self.DEFAULT_FEATURE_EXTRACTOR_NAME in feTmp: cont = [(self.DEFAULT_FEATURE_EXTRACTOR_NAME, feTmp[self.DEFAULT_FEATURE_EXTRACTOR_NAME])] # add the rest cont += [(n, p) for n, p in feTmp.items() if n != self.DEFAULT_FEATURE_EXTRACTOR_NAME] self._featuresExt = OrderedDict(cont) else: self._featuresExt = OrderedDict(feTmp) # available classifiers if len(CLASSIFIERS) == 0: raise RuntimeError("There are no classifiers plugins.") clsTmp = set() for cls in CLASSIFIERS.values(): if cls.getName() in clsTmp: # wow, name collision raise RuntimeError( "Collision of classifiers names. For name: " + cls.getName()) clsTmp.add(cls.getName()) # available Validators self.availableEvaluationMethods = getAllSubclasses(Validator) self._evaluationMethod = self.availableEvaluationMethods[0]( ) # add default # available Features selectors self.availableFeatureSelectors = getAllSubclasses(FeaturesSelector) @property def featuresSelectors(self): """ Features selectors for feature selecting. """ return [s.plugin for s in self._featuresSele] @property def featuresSelectorsSlots(self): """ All used features selectors slots. """ return self._featuresSele @property def classifiersSlots(self): """ All curently used classifiers slots. """ return self._classifiers @property def classifiers(self): """ Classifiers for testing. """ return [s.plugin for s in self._classifiers] def newClassifierSlot(self): """ Creates new slot for classifier that should be tested. :return: Classifier slot :rtype: PluginSlot """ return self._addPluginSlot(self._classifiers) def removeClassifierSlot(self, slot: PluginSlot): """ Remove classifier slot. :param slot: Slot for classifier. :type slot:PluginSlot """ self._removePluginSlot(self._classifiers, slot) def newFeaturesSelectorSlot(self): """ Creates new slot for features selector that should be tested. :return: Features selector slot :rtype: PluginSlot """ return self._addPluginSlot(self._featuresSele) def removeFeaturesSelectorSlot(self, slot: PluginSlot): """ Remove features selector slot. :param slot: Slot for features selector. :type slot: PluginSlot """ self._removePluginSlot(self._featuresSele, slot) def _addPluginSlot(self, bank): """ Creates new slot in given slot bank. :param bank: Slot bank :type bank: List[PluginSlot] :return: New slot :rtype: PluginSlot """ # lets find first empty id slotId = 0 if len(bank) == 0 else max(p.id for p in bank) + 1 bank.append(PluginSlot(slotId)) return bank[-1] def _removePluginSlot(self, bank: List[PluginSlot], slot: PluginSlot): """ Creates new slot in given slot bank. :param bank: Slot bank :type bank: List[PluginSlot] :param slot: Slot that should be removed. :type slot: PluginSlot """ bank.remove(slot) @property def availableClassifiers(self): """ Available classifiers plugins. """ return CLASSIFIERS @property def featuresExt(self): """ Available features extractors plugins. Stored in OrderedDict (name -> plugin). Because it is handy to have default extractor as first (if exists). """ return self._featuresExt @Observable._event("NEW_DATA_SET") def loadDataset(self, filePath: str): """ Loades dataset. :param filePath: Path to file with dataset. :type filePath: str """ self._dataset = DataSet(filePath) # prepare new attribute settings self._attributesSet = { name: { self.AttributeSettings.USE: True, self.AttributeSettings.PATH: False, self.AttributeSettings.FEATURE_EXTRACTOR: next(iter(self._featuresExt.values()))() } for name in self._dataset.attributes } self._label = None self._dataStats = None self._attributesThatShouldBeUsedCache = {} @property def evaluationMethod(self): """ Validator used for evaluation. """ return self._evaluationMethod @evaluationMethod.setter def evaluationMethod(self, val): """ Validator used for evaluation. :param val: Validtor or name of validator class. If name of validator is provided than new object of it's corresponding class is created. :type val:str|Validator :raise ValueError: When invalid value is given (unknown name). """ if isinstance(val, Validator): self._evaluationMethod = val else: # self.availableEvaluationMethods is a list because we want to preserve order and therefore # we have no other choice than to iterate over it and find the right by name. for v in self.availableEvaluationMethods: if v.getName() == val: self._evaluationMethod = v() return raise ValueError("Unknown Validator name: " + val) def setEvaluationMethod(self, val): """ Same as evaluationMethod but can be used as callable :param val: Validtor or name of validator class. If name of validator is provided than new object of it's corresponding class is created. :type val:str|Validator :raise ValueError: When invalid value is given (unknown name). """ self.evaluationMethod = val @property def label(self): """ Attribute name that is set as label or None. """ return self._label def getAttributeSetting(self, attribute: str, t): """ Get attribute setting of given type. :param attribute: The attribute. :type attribute: str :param t: The setting type. :type t: Experiment.AttributeSettings """ if t == Experiment.AttributeSettings.LABEL: return self._label == attribute return self._attributesSet[attribute][t] @Observable._event("ATTRIBUTES_CHANGED") def attributesChangedEvent(self): """ This event exists for informing observers that some attribute is no longer used or started to be used or when attribute is marked as label. """ pass def setAttributeSetting(self, attribute: str, t, val): """ Set attribute setting of given type. :param attribute: The attribute. :type attribute: str :param t: The setting type. :type t: Experiment.AttributeSettings :param val: New value. For setting new label val must be true, because if you pass false than label will be set to None. :type val: bool | Plugin :raise KeyError: When the name of attribute is uknown. """ if t == Experiment.AttributeSettings.LABEL: self._label = attribute if val else None # setting new label invalidates data stats self.setDataStats(None) else: self._attributesSet[attribute][t] = val if t == Experiment.AttributeSettings.PATH: # we must inform the data set object if val: self._dataset.addPathAttribute( attribute, self._attributesSet[attribute][ Experiment.AttributeSettings.FEATURE_EXTRACTOR]. expDataType()) else: self._dataset.removePathAttribute(attribute) if t == Experiment.AttributeSettings.FEATURE_EXTRACTOR and \ attribute in self._dataset.pathAttributes: # we must inform the data set object self._dataset.addPathAttribute(attribute, val.expDataType()) if t == Experiment.AttributeSettings.USE or t == Experiment.AttributeSettings.LABEL: self._attributesThatShouldBeUsedCache = {} self.attributesChangedEvent() def attributesThatShouldBeUsed(self, label: bool = True): """ Names of attributes that should be used. :param label: True means that label attribute should be among them. :type label: bool """ # we are preserving original attribute order try: return self._attributesThatShouldBeUsedCache[label] except KeyError: res = [a for a in self.dataset.attributes \ if self._attributesSet[a][Experiment.AttributeSettings.USE] and (label or a != self._label)] self._attributesThatShouldBeUsedCache[label] = res return res @property def dataset(self): """ Loaded dataset. """ return self._dataset
def get_more_stats(user): def get_episodes_and_time(element): if element[1].status == Status.COMPLETED or element[ 1].status == Status.COMPLETED_ANIMATION: try: return [1, element[0].runtime] except: return [ element[0].total_episodes, int(element[0].episode_duration) * element[0].total_episodes ] elif element[1].status != Status.PLAN_TO_WATCH and element[ 1].status != Status.RANDOM: nb_episodes = [m.episodes for m in element[0].eps_per_season] ep_duration = int(element[0].episode_duration) ep_counter = 0 for i in range(0, element[1].current_season - 1): ep_counter += int(nb_episodes[i]) episodes_watched = ep_counter + element[1].last_episode_watched time_watched = (ep_duration * episodes_watched) return [episodes_watched, time_watched] else: return [0, 0] series_data = db.session.query(Series, SeriesList) \ .join(SeriesList, SeriesList.media_id == Series.id) \ .filter(SeriesList.user_id == user.id) # test = db.session.query(func.strftime('%Y', Series.first_air_date).label('year'), # func.count(Series.first_air_date))\ # .join(SeriesList, Series.id == SeriesList.media_id)\ # .filter(SeriesList.user_id == current_user.id)\ # .group_by(func.strftime('%Y', Series.first_air_date)).order_by(text('year desc')).all() # test = db.session.query(func.strftime('%Y', Movies.release_date).label('year'), # func.count(Movies.release_date))\ # .join(MoviesList, Movies.id == MoviesList.media_id)\ # .filter(MoviesList.user_id == current_user.id)\ # .group_by(func.strftime('%Y', Movies.release_date)).order_by(text('year desc')).all() # test = db.session.query(SeriesGenre.genre, func.count(SeriesGenre.genre).label('count')) \ # .join(SeriesList, SeriesGenre.media_id == SeriesList.media_id) \ # .join(Series, Series.id == SeriesList.media_id) \ # .filter(SeriesList.user_id == current_user.id)\ # .group_by(SeriesGenre.genre).order_by(text('count desc')).all() anime_data = db.session.query(Anime, AnimeList) \ .join(AnimeList, AnimeList.media_id == Anime.id) \ .filter(AnimeList.user_id == user.id) movies_data = db.session.query(Movies, MoviesList) \ .join(MoviesList, MoviesList.media_id == Movies.id) \ .filter(MoviesList.user_id == user.id) media_data = [series_data, anime_data, movies_data] data = {} for index, media in enumerate(media_data): genres_time = {} periods_time = OrderedDict({ '1960-1969': 0, '1970-1979': 0, '1980-1989': 0, '1990-1999': 0, '2000-2009': 0, '2010-2019': 0, '2020+': 0 }) episodes_time = OrderedDict({ '1-19': 0, '20-49': 0, '50-99': 0, '100-149': 0, '150-199': 0, '200-299': 0, '300-399': 0, '400-499': 0, '500+': 0 }) movies_time = OrderedDict({ '<1h': 0, '1h-1h29': 0, '1h30-1h59': 0, '2h00-2h29': 0, '2h30-2h59': 0, '3h+': 0 }) for element in media: # Number of episodes and the time watched by element episodes_watched, time_watched = get_episodes_and_time(element) # Genres stats for genre in [m.genre for m in element[0].genres]: if genre not in genres_time: genres_time[genre] = time_watched else: genres_time[genre] += time_watched # Period stats try: airing_year = int(element[0].first_air_date.split('-')[0]) except: try: airing_year = int(element[0].release_date.split('-')[0]) except: airing_year = 0 if 1960 <= airing_year < 1970: periods_time['1960-1969'] += 1 elif 1970 <= airing_year < 1980: periods_time['1970-1979'] += 1 elif 1980 <= airing_year < 1990: periods_time['1980-1989'] += 1 elif 1990 <= airing_year < 2000: periods_time['1990-1999'] += 1 elif 2000 <= airing_year < 2010: periods_time['2000-2009'] += 1 elif 2010 <= airing_year < 2020: periods_time['2010-2019'] += 1 elif airing_year >= 2020: periods_time['2020+'] += 1 # Episodes / time stats if index != 2: if 1 <= episodes_watched < 19: episodes_time['1-19'] += 1 elif 20 <= episodes_watched < 49: episodes_time['20-49'] += 1 elif 50 <= episodes_watched < 99: episodes_time['50-99'] += 1 elif 100 <= episodes_watched < 149: episodes_time['100-149'] += 1 elif 150 <= episodes_watched < 199: episodes_time['150-199'] += 1 elif 200 <= episodes_watched < 299: episodes_time['200-299'] += 1 elif 300 <= episodes_watched < 399: episodes_time['300-399'] += 1 elif 400 <= episodes_watched < 499: episodes_time['400-499'] += 1 elif episodes_watched >= 500: episodes_time['500+'] += 1 else: if time_watched < 60: movies_time['<1h'] += 1 elif 60 <= time_watched < 90: movies_time['1h-1h29'] += 1 elif 90 <= time_watched < 120: movies_time['1h30-1h59'] += 1 elif 120 <= time_watched < 150: movies_time['2h00-2h29'] += 1 elif 150 <= time_watched < 180: movies_time['2h30-2h59'] += 1 elif time_watched >= 180: movies_time['3h+'] += 1 # Rename if index == 0: genres_time['Action/Adventure'] = genres_time.pop( 'Action & Adventure', 0) genres_time['War/Politics'] = genres_time.pop('War & Politics', 0) genres_time['Sci-Fi/Fantasy'] = genres_time.pop( 'Sci-Fi & Fantasy', 0) genres_time.pop('Unknown', 0) if all(x == 0 for x in genres_time.values()): genres_time = {} else: genres_time = sorted(genres_time.items(), key=operator.itemgetter(1), reverse=True) if all(x == 0 for x in periods_time.values()): periods_time = {} if all(x == 0 for x in episodes_time.values()): episodes_time = {} if all(x == 0 for x in movies_time.values()): movies_time = {} if index == 0: data.update({ 'Series_genres': genres_time, 'Series_periods': periods_time, 'Series_episodes': episodes_time }) elif index == 1: data.update({ 'Anime_genres': genres_time, 'Anime_periods': periods_time, 'Anime_episodes': episodes_time }) else: data.update({ 'Movies_genres': genres_time, 'Movies_periods': periods_time, 'Movies_times': movies_time }) return data