예제 #1
0
 def keep_latest_dict(self, load_event_dicts):
     '''
     Given a list of dicts with table-name, load-date,
     and row num keys, return a new list with only the
     dicts that describe the most recent table refresh.
     
     @param load_event_dicts: array of dict describing table
         refresh events.
     @type load_event_dicts: [{}]
     '''
     # Dict {tbl_name : load_event_dict} to hold
     # the most recent dict for the respective table.
     # Use an ordered dict to not mess up order of
     # passed-in dicts:
      
     latest_dicts = OrderedDict()
     for load_event_dict in load_event_dicts:
         tbl_nm = load_event_dict['tbl_name']
         try:
             if load_event_dict['time_refreshed'] > latest_dicts[tbl_nm]['time_refreshed']:
                 latest_dicts[tbl_nm] = load_event_dict
         except KeyError:
             # First time we see an entry for this table:
             latest_dicts[tbl_nm] = load_event_dict
     
     res = [newest_refresh_dict for newest_refresh_dict in latest_dicts.values()]
     return res        
    class Nodes(object):
        def __init__(self):
            self.input_nodes = OrderedDict()
            self.intermediate_nodes = OrderedDict()
            self.output_nodes = OrderedDict()

        def __contains__(self, item: nodes.Node):
            if item in self.intermediate_nodes:
                return True
            if item in self.input_nodes:
                return True
            if item in self.output_nodes:
                return True
            return False

        def __getitem__(self, item: nodes.Node):
            if item in self.intermediate_nodes:
                return self.intermediate_nodes[item]
            elif item in self.input_nodes:
                return self.input_nodes[item]
            elif item in self.output_nodes:
                return self.output_nodes[item]
            raise KeyError

        def __iter__(self):
            for node in self.input_nodes.values():
                yield node
            for node in self.intermediate_nodes.values():
                yield node
            for node in self.output_nodes.values():
                yield node

        def __str__(self):
            string = ''
            for node in self:
                string += f"\n{node}"
            return string
예제 #3
0
class TwoLayerNet:
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.rand(
            input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.rand(
            hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1: t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads

    def gradient(self, x, t):

        self.loss(x, t)

        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db

        return grads
예제 #4
0
class Transect(sm.CustomObject):
    base_type = "transect"
    type = "transect"
    datum = "top of face"

    def __init__(self, **kwargs):
        super(Transect, self).__init__()
        self._locs = OrderedDict()
        self.name = kwargs.get("name", None)
        start = kwargs.get("start", (0, 0))  # coords (lat, long)
        end = kwargs.get("end", (0, 0))
        self.s_coords = Coords(lat=start[0], lon=start[1])
        self.e_coords = Coords(lat=end[0], lon=end[1])
        self.ug_values = []
        self.ug_xs = []
        self.h_face = kwargs.get("h_face", None)
        self.av_ground_slope = kwargs.get("av_ground_slope", None)
        self._extra_class_inputs = [
            "locs", "start", "end", "ug_values", "ug_xs", "h_face",
            "av_ground_slope", "datum"
        ]
        self.inputs = self.inputs + self._extra_class_inputs

    def add_cpt_by_coords(self, cpt, coords, **kwargs):

        esp = kwargs.get("esp", None)
        loc = Loc(cpt=cpt, name=cpt.file_name, esp=esp)
        loc.coords = coords
        return self.add_loc_by_coords(coords, loc)

    def add_cpt(self, cpt, x, **kwargs):
        offset = kwargs.get("offset", None)
        off_dir = kwargs.get("off_dir", "-")
        esp = kwargs.get("esp", None)
        loc = Loc(cpt=cpt,
                  name=cpt.file_name,
                  offset=offset,
                  off_dir=off_dir,
                  esp=esp)
        return self.add_loc(x, loc)

    def get_cpt_names(self):
        _cpts = []
        for x in self.locs:
            _cpts.append(self.locs[x].cpt_file_name)
        return _cpts

    def set_ids(self):
        for i, loc_name in enumerate(self.locs):
            self.locs[loc_name].id = i + 1
            if self.locs[loc_name].soil_profile is not None:
                self.locs[loc_name].soil_profile.id = i + 1

    def to_dict(self, extra=(), **kwargs):
        outputs = OrderedDict()
        skip_list = ["locs"]
        if hasattr(self, "inputs"):
            full_inputs = list(self.inputs) + list(extra)
        else:
            full_inputs = list(extra)
        for item in full_inputs:
            if item not in skip_list:
                value = self.__getattribute__(item)
                outputs[item] = sf.collect_serial_value(value)
        return outputs

    def add_to_dict(self, models_dict, **kwargs):
        if self.base_type not in models_dict:
            models_dict[self.base_type] = OrderedDict()
        outputs = self.to_dict(**kwargs)
        models_dict[self.base_type][self.unique_hash] = outputs
        for loc_num in self.locs:
            self.locs[loc_num].add_to_dict(
                models_dict,
                parent_dict=models_dict[self.base_type][self.unique_hash])

    def reset_cpt_folder_paths(self, folder_path):
        for loc_name in self.locs:
            self.locs[loc_name].cpt_folder_path = folder_path

    @property
    def tran_line(self):
        try:
            from liquepy.spatial.map_coords import Line
            return Line(self.s_coords, self.e_coords)
        except ImportError as e:
            warnings.warn('Need to import spatial packages', stacklevel=3)
            warnings.warn(e, stacklevel=3)
            return None

    @property
    def x_end(self):
        return self.tran_line.dist

    @property
    def locs(self):
        return self._locs

    def add_loc(self, x: float, loc):
        loc.x = x
        self._locs[x] = loc
        self._sort_locs()
        return self._locs[x]

    def add_loc_by_coords(self, coords, loc):
        from liquepy.spatial import map_coords
        if not sum(self.start) or not sum(self.end):
            raise ValueError("start and end coordinates must be set")
        loc.x = map_coords.calc_proj_line_dist(self.tran_line, coords)
        loc.offset = map_coords.calc_line_offset(self.tran_line, coords)
        loc.off_dir = map_coords.calc_line_off_dir(self.tran_line, coords)
        self._locs[loc.x] = loc
        self._sort_locs()
        return self._locs[loc.x]

    @locs.setter
    def locs(self, locs):
        for loc_id in locs:
            loc_dist = locs[loc_id]["x"]
            self.locs[loc_dist] = Loc()
            sm.add_to_obj(self.locs[loc_dist], locs[loc_id])

    def _sort_locs(self):
        """
        Sort the locs by distance.
        :return:
        """
        self._locs = OrderedDict(sorted(self._locs.items(),
                                        key=lambda t: t[0]))

    def get_loc_by_name(self, name):
        for x in self.locs:
            if self.locs[x].name == name:
                return self.locs[x]

    def get_loc_by_dist(self, dist):
        return self.locs[dist]

    def loc(self, index):
        index = int(index)
        if index == 0:
            raise KeyError("index=%i, but must be 1 or greater." % index)
        return list(self._locs.values())[index - 1]

    def remove_loc(self, loc_int):
        key = list(self._locs.keys())[loc_int - 1]
        del self._locs[key]

    def replace_loc(self, loc_int, soil):
        key = list(self._locs.keys())[loc_int - 1]
        self._locs[key] = soil

    @property
    def start(self):
        return self.s_coords.as_tuple

    @property
    def end(self):
        return self.e_coords.as_tuple

    @start.setter
    def start(self, values):
        self.s_coords = Coords(lat=values[0], lon=values[1])

    @end.setter
    def end(self, values):
        self.e_coords = Coords(lat=values[0], lon=values[1])
예제 #5
0
def create_atlas(animal, create):

    fileLocationManager = FileLocationManager(animal)
    atlas_name = 'atlasV7'
    THUMBNAIL_DIR = os.path.join(ROOT_DIR, animal, 'preps', 'CH1', 'thumbnail')
    ATLAS_PATH = os.path.join(DATA_PATH, 'atlas_data', atlas_name)
    ORIGIN_PATH = os.path.join(ATLAS_PATH, 'origin')
    VOLUME_PATH = os.path.join(ATLAS_PATH, 'structure')
    OUTPUT_DIR = os.path.join(fileLocationManager.neuroglancer_data, 'atlas')
    if os.path.exists(OUTPUT_DIR):
        shutil.rmtree(OUTPUT_DIR)
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    origin_files = sorted(os.listdir(ORIGIN_PATH))
    volume_files = sorted(os.listdir(VOLUME_PATH))
    sqlController = SqlController(animal)
    resolution = sqlController.scan_run.resolution
    surface_threshold = 0.8
    SCALE = (10 / resolution)

    structure_volume_origin = {}
    for volume_filename, origin_filename in zip(volume_files, origin_files):
        structure = os.path.splitext(volume_filename)[0]
        if structure not in origin_filename:
            print(structure, origin_filename)
            break

        color = get_structure_number(structure.replace('_L', '').replace('_R', ''))

        origin = np.loadtxt(os.path.join(ORIGIN_PATH, origin_filename))
        volume = np.load(os.path.join(VOLUME_PATH, volume_filename))

        volume = np.rot90(volume, axes=(0, 1))
        volume = np.flip(volume, axis=0)
        volume[volume > surface_threshold] = color
        volume = volume.astype(np.uint8)

        structure_volume_origin[structure] = (volume, origin)

    col_length = sqlController.scan_run.width/SCALE
    row_length = sqlController.scan_run.height/SCALE
    z_length = len(os.listdir(THUMBNAIL_DIR))
    atlasV7_volume = np.zeros(( int(row_length), int(col_length), z_length), dtype=np.uint8)
    print('atlas volume shape', atlasV7_volume.shape)

    ##### actual data for both sets of points, pixel coordinates
    centers = OrderedDict(MD589_centers)
    centers_list = []
    for value in centers.values():
        centers_list.append((value[1]/SCALE, value[0]/SCALE, value[2]))
    COM = np.array(centers_list)
    atlas_com_centers = OrderedDict()
    atlas_all_centers = {}
    for structure, (volume, origin) in sorted(structure_volume_origin.items()):
        midcol, midrow, midz = origin
        row_start = midrow + row_length / 2
        col_start = midcol + col_length / 2
        z_start = midz / 2 + z_length / 2
        row_end = row_start + volume.shape[0]
        col_end = col_start + volume.shape[1]
        z_end = z_start + (volume.shape[2] + 1) / 2
        midcol = (col_end + col_start) / 2
        midrow = (row_end + row_start) / 2
        midz = (z_end + z_start) / 2
        if structure in centers.keys():
            atlas_com_centers[structure] = [midrow, midcol, midz]
        atlas_all_centers[structure] = [midrow, midcol, midz]
    ATLAS_centers = OrderedDict(atlas_com_centers)
    ATLAS = np.array(list(ATLAS_centers.values()))
    #### both sets of data are scaled to stack of DK52
    pprint(COM)
    pprint(ATLAS)
    #####Transform to auto align
    r_auto, t_auto = align_point_sets(ATLAS.T, COM.T)

    rotationpath = os.path.join(ATLAS_PATH, f'atlas2{animal}.rotation.npy')
    np.save(rotationpath, r_auto)
    translatepath = os.path.join(ATLAS_PATH, f'atlas2{animal}.translation.npy')
    np.save(translatepath, t_auto)


    # Litao, look at the start and end for these structures, the x and y look good
    # but the z (section) is off
    debug = True
    for structure, (volume, origin) in sorted(structure_volume_origin.items()):
        print(str(structure).ljust(7),end=": ")

        source_point = np.array(atlas_all_centers[structure]) # get adjusted x,y,z from above loop
        results = (r_auto @ source_point + t_auto.T).reshape(1,3) # transform to fit
        x = results[0][1] # new x
        y = results[0][0] # new y
        z = results[0][2] # z
        x = x - volume.shape[0]/2
        y = y - volume.shape[1]/2
        x_start = int( round(x))
        y_start = int( round(y))
        z_start = int(z - volume.shape[2]/4)

        x_end = int( round(x_start + volume.shape[0]))
        y_end = int( round(y_start + volume.shape[1]))
        z_end = int( round(z_start + (volume.shape[2] + 1) // 2))

        if debug:
            #print('volume shape', volume.shape, end=" ")
            print('COM row',
                  str(int(y)).rjust(4),
                  'mid col',
                  str(int(x)).rjust(4),
                  'mid z',
                  str(int(z)).rjust(4),
                  end=" ")
            print('Row range',
                  str(y_start).rjust(4),
                  str(y_end).rjust(4),
                  'col range',
                  str(x_start).rjust(4),
                  str(x_end).rjust(4),
                  'z range',
                  str(z_start).rjust(4),
                  str(z_end).rjust(4),
                  end=" ")

        if structure in centers.keys():
            xo,yo,zo = MD589_centers[structure]
            print('COM off by:',
                  round(x*SCALE - xo, 2),
                  round(y*SCALE - yo, 2),
                  round(z - zo, 2),
                  end=" ")

        z_indices = [z for z in range(volume.shape[2]) if z % 2 == 0]
        volume = volume[:, :, z_indices]
        volume = np.swapaxes(volume, 0, 1)
        try:
            atlasV7_volume[y_start:y_end, x_start:x_end, z_start:z_end] += volume
        except ValueError as ve:
            print('Bad fit', end=" ")

        print()

    resolution = int(resolution * 1000 * SCALE)
    print('Shape of downsampled atlas volume', atlasV7_volume.shape)

    print('Resolution at', resolution)

    if create:
        atlasV7_volume = np.rot90(atlasV7_volume, axes=(0, 1))
        atlasV7_volume = np.fliplr(atlasV7_volume)
        atlasV7_volume = np.flipud(atlasV7_volume)
        atlasV7_volume = np.fliplr(atlasV7_volume)

        offset = [0,0,0]
        ng = NumpyToNeuroglancer(atlasV7_volume, [resolution, resolution, 20000], offset=offset)
        ng.init_precomputed(OUTPUT_DIR)
        ng.add_segment_properties(get_segment_properties())
        ng.add_downsampled_volumes()
        ng.add_segmentation_mesh()

        #outpath = os.path.join(ATLAS_PATH, f'{atlas_name}.tif')
        #io.imsave(outpath, atlasV7_volume.astype(np.uint8))
    end = timer()
    print(f'Finito! Program took {end - start} seconds')
예제 #6
0
        if "PARstart" in x_chr_dict[entry]:
            PAR_boolean_windows = (
                df_dict["onlyX"][entry]["WINDOW"] * args.window_step >=
                x_chr_dict[entry]["PARstart"]) & (
                    df_dict["onlyX"][entry]["WINDOW"] * args.window_step +
                    args.window_size <= x_chr_dict[entry]["PARend"])
            df_dict["noPAR"][entry] = df_dict["onlyX"][entry][
                ~PAR_boolean_windows]
            df_dict["PAR"][entry] = df_dict["onlyX"][entry][
                PAR_boolean_windows]
            df_number_dict[entry] += 2
    #if x_chr_dict:
    #    df_dict["noX"][entry]["density"] = df_dict["noX"][entry]["All"] * args.multiplier / args.window_size
    #    df_dict["onlyX"][entry]["density"] = df_dict["onlyX"][entry]["All"] * args.multiplier / args.window_size

df_number_list = list(df_number_dict.values())

figure_height = 4
figure_width = max(1, int(sum(df_number_list) / 2))
dpi = 300
fig, ax = plt.subplots(nrows=1,
                       ncols=1,
                       figsize=(figure_width, figure_height),
                       dpi=dpi)
fig.patch.set_color('white')

data_list = []
label_list = []
x_pos_list = []

inner_distance = 1
예제 #7
0
class CourseSummaryStats(object):
    '''
    Holds the summary statistics of all offerings of
    a single class. Included: average percentages of responses
    across all offerings of one course. So there will be
    a single instance for each course name. 
    
    Also included: a list of termcore numbers when the
    course was offered.
    
    '''

    #--------------------------------
    # Constructor CourseSummaryStats
    #------------------

    def __init__(self, course_stats_obj_list):

        self.course_name = course_stats_obj_list[0]['crse_code']

        num_offerings = len(course_stats_obj_list)

        # Compute average percentage in each difficulty level
        # of each offering:

        diff_level1_sum = 0
        diff_level2_sum = 0
        diff_level3_sum = 0
        diff_level4_sum = 0
        diff_level5_sum = 0
        diff_level6_sum = 0
        diff_level7_sum = 0
        diff_level8_sum = 0
        # Sum the percentage difficulties for each
        # difficulty for all offerings of the current course:
        for course_stat_obj in course_stats_obj_list:
            diff_level1_sum += course_stat_obj.percent_by_difficulty(1)
            diff_level2_sum += course_stat_obj.percent_by_difficulty(2)
            diff_level3_sum += course_stat_obj.percent_by_difficulty(3)
            diff_level4_sum += course_stat_obj.percent_by_difficulty(4)
            diff_level5_sum += course_stat_obj.percent_by_difficulty(5)
            diff_level6_sum += course_stat_obj.percent_by_difficulty(6)
            diff_level7_sum += course_stat_obj.percent_by_difficulty(7)
            diff_level8_sum += course_stat_obj.percent_by_difficulty(8)
        self.summary_dict = OrderedDict({
            CourseStats.DIFF_LEVEL1:
            diff_level1_sum / num_offerings,
            CourseStats.DIFF_LEVEL2:
            diff_level2_sum / num_offerings,
            CourseStats.DIFF_LEVEL3:
            diff_level3_sum / num_offerings,
            CourseStats.DIFF_LEVEL4:
            diff_level4_sum / num_offerings,
            CourseStats.DIFF_LEVEL5:
            diff_level5_sum / num_offerings,
            CourseStats.DIFF_LEVEL6:
            diff_level6_sum / num_offerings,
            CourseStats.DIFF_LEVEL7:
            diff_level7_sum / num_offerings,
            CourseStats.DIFF_LEVEL8:
            diff_level8_sum / num_offerings
        })
        self.num_offerings = num_offerings
        # List of termcores when course was offered
        self.termcores = [
            stats_obj['termcore'] for stats_obj in course_stats_obj_list
        ]

    #--------------------------------
    # values
    #------------------

    def values(self):
        '''
        Return an iterator over summary percentage of each
        difficulty level. Order is guaranteed to be from 
        level 1 to level 8.
        '''
        return self.summary_dict.values()

    #--------------------------------
    # __getitem__
    #------------------

    def __getitem__(self, key):
        '''
        If key is a DIFF_LEVEL<n> instance, return that 
        difficult level's summary response percentage.
        Else key must be one of 
          o num_offerings
          o termcores
          o course_name
        The non-difficulty keys would likely be better
        done with properties. But this is very clear:
        '''
        if isinstance(key, Interval):
            return self.summary_dict[key]
        elif key == 'num_offerings':
            return self.num_offerings
        elif key == 'termcores':
            return self.termcores
        elif key == 'course_name':
            return self.course_name
        else:
            raise KeyError("Key '%s' not in dict." % key)

    #--------------------------------
    # percent_by_difficulty
    #------------------

    def percent_by_difficulty(self, difficulty_level):
        '''
        Given either an integer difficulty level between 1 and 8,
        or one of the instances CourseStats.DIFF_LEVEL<n>, return
        this course's average percentage across all offerings of 
        responses that lie in that difficulty range.
        
        @param difficulty_level: indicator of which difficulty level is wanted
        @type difficulty_level: {int | Interval}
        '''

        if difficulty_level == 1 or difficulty_level == CourseStats.DIFF_LEVEL1:
            return self.summary_dict['diff_level1_perc']
        elif difficulty_level == 2 or difficulty_level == CourseStats.DIFF_LEVEL2:
            return self.summary_dict['diff_level2_perc']
        elif difficulty_level == 3 or difficulty_level == CourseStats.DIFF_LEVEL3:
            return self.summary_dict['diff_level3_perc']
        elif difficulty_level == 4 or difficulty_level == CourseStats.DIFF_LEVEL4:
            return self.summary_dict['diff_level4_perc']
        elif difficulty_level == 5 or difficulty_level == CourseStats.DIFF_LEVEL5:
            return self.summary_dict['diff_level5_perc']
        elif difficulty_level == 6 or difficulty_level == CourseStats.DIFF_LEVEL6:
            return self.summary_dict['diff_level6_perc']
        elif difficulty_level == 7 or difficulty_level == CourseStats.DIFF_LEVEL7:
            return self.summary_dict['diff_level7_perc']
        elif difficulty_level == 8 or difficulty_level == CourseStats.DIFF_LEVEL8:
            return self.summary_dict['diff_level8_perc']
def contar_palabras(
    bs, link
):  #Funcion para contar las palabras del HTML, resibimos el html y el link de la pagina
    try:
        texto = bs.getText().lower(
        )  #Pasamos el HTML a texto y lo convertimos a minisculas
        result = re.sub(
            r'[^\w\s]', '', texto
        )  #eliminamos las comillas simples, comillas dobles, parentesis, etc.
        stop_words = set(stopwords.words(
            'spanish'))  #indicamos una "lista" de pronombres en español
        word_tokens = word_tokenize(
            result)  #separamos las cadenas del texto en palabras

        word_tokens = list(
            filter(lambda token: token not in string.punctuation,
                   word_tokens))  #eliminamos los signos de puntuacion

        filtro = []  #arreglo que nos servira como filtro más adelante
        for palabra in word_tokens:  #verificamos cada palabra que hay en la lista sin singnos de puntuacion
            if palabra not in stop_words:  #verificamos si la palabra no esta en la lista de pronombres
                filtro.append(
                    palabra)  #si no esta, la añadimos a arreglo filtro

        c = Counter(
            filtro
        )  #contamos las palabras que se repitan en el arreglo de filtro

        num = 3  #Numero de palabras a mostrar

        y = OrderedDict(
            c.most_common(num)
        )  #le indicamos que las ordene de las más repetidas a las que menos se repitan, solo se mostraran el numero de palabras que se haya ingresado anteriormente
        lista = list(y)[:3]  #Pasamos a una lista las palabras mas comunes
        repetido = list(y.values(
        ))  #pasamos a una lista el numero que veces que se repite cada palabra

        #Guardamos en la BD las palabras que màs se repiten
        enlaces.update_one({'direccion': link},
                           {'$set': {
                               'palabra1': lista[0]
                           }})
        enlaces.update_one({'direccion': link},
                           {'$set': {
                               'palabra2': lista[1]
                           }})
        enlaces.update_one({'direccion': link},
                           {'$set': {
                               'palabra3': lista[2]
                           }})

        #Guardamos en la BD cuantas veces se repite cada palabra
        enlaces.update_one({'direccion': link},
                           {'$set': {
                               'ranking1': repetido[0]
                           }})
        enlaces.update_one({'direccion': link},
                           {'$set': {
                               'ranking2': repetido[1]
                           }})
        enlaces.update_one({'direccion': link},
                           {'$set': {
                               'ranking3': repetido[2]
                           }})
    except:
        print('Ha ocurrido un error con las palabras')
##### actual data for both sets of points
MD589_centers = {
    '5N_L': [23790, 13025, 160],
    '5N_R': [20805, 14163, 298],
    '7n_L': [20988, 18405, 177],
    '7n_R': [24554, 13911, 284],
    'DC_L': [24482, 11985, 134],
    'DC_R': [20424, 11736, 330],
    'LC_L': [25290, 11750, 180],
    'LC_R': [24894, 12079, 268],
    'SC': [24226, 6401, 220]
}
MD589_centers = OrderedDict(MD589_centers)
MD589_list = []
for value in MD589_centers.values():
    MD589_list.append((value[1] / SCALE, value[0] / SCALE, value[2]))
MD589 = np.array(MD589_list)

atlas_centers = {
    '5N_L': [686.53, 990.08, 155.38],
    '5N_R': [686.53, 990.08, 292.62],
    '7n_L': [725.04, 1034.44, 172.21],
    '7n_R': [725.04, 1034.44, 275.79],
    'DC_L': [806.29, 955.16, 130.35],
    'DC_R': [806.29, 955.16, 317.65],
    'LC_L': [731.55, 934.49, 182.33],
    'LC_R': [731.55, 934.49, 265.67],
    'SC': [602.87, 757.7, 225.5],
}
예제 #10
0
    def _report_textual_results(self, tally_coll, res_dir):
        '''
        Give a sequence of tallies with results
        from a series of batches, create long
        outputs, and inputs lists from all tallies
        
        Computes information retrieval type values:
             precision (macro/micro/weighted/by-class)
             recall    (macro/micro/weighted/by-class)
             f1        (macro/micro/weighted/by-class)
             acuracy
             balanced_accuracy
        
        Combines these results into a Pandas series, 
        and writes them to a csv file. That file is constructed
        from the passed-in res_dir, appended with 'ir_results.csv'.
        
        Finally, constructs Github flavored tables from the
        above results, and posts them to the 'text' tab of 
        tensorboard.
        
        Returns the results measures Series 
        
        :param tally_coll: collect of tallies from batches
        :type tally_coll: ResultCollection
        :param res_dir: directory where all .csv and other 
            result files are to be written
        :type res_dir: str
        :return results of information retrieval-like measures
        :rtype: pandas.Series
        '''

        all_preds = []
        all_labels = []

        for tally in tally_coll.tallies(phase=LearningPhase.TESTING):
            all_preds.extend(tally.preds)
            all_labels.extend(tally.labels)

        res = OrderedDict({})
        res['prec_macro'] = precision_score(all_labels,
                                            all_preds,
                                            average='macro',
                                            zero_division=0)
        res['prec_micro'] = precision_score(all_labels,
                                            all_preds,
                                            average='micro',
                                            zero_division=0)
        res['prec_weighted'] = precision_score(all_labels,
                                               all_preds,
                                               average='weighted',
                                               zero_division=0)
        res['prec_by_class'] = precision_score(all_labels,
                                               all_preds,
                                               average=None,
                                               zero_division=0)

        res['recall_macro'] = recall_score(all_labels,
                                           all_preds,
                                           average='macro',
                                           zero_division=0)
        res['recall_micro'] = recall_score(all_labels,
                                           all_preds,
                                           average='micro',
                                           zero_division=0)
        res['recall_weighted'] = recall_score(all_labels,
                                              all_preds,
                                              average='weighted',
                                              zero_division=0)
        res['recall_by_class'] = recall_score(all_labels,
                                              all_preds,
                                              average=None,
                                              zero_division=0)

        res['f1_macro'] = f1_score(all_labels,
                                   all_preds,
                                   average='macro',
                                   zero_division=0)
        res['f1_micro'] = f1_score(all_labels,
                                   all_preds,
                                   average='micro',
                                   zero_division=0)
        res['f1_weighted'] = f1_score(all_labels,
                                      all_preds,
                                      average='weighted',
                                      zero_division=0)
        res['f1_by_class'] = f1_score(all_labels,
                                      all_preds,
                                      average=None,
                                      zero_division=0)

        res['accuracy'] = accuracy_score(all_labels, all_preds)
        res['balanced_accuracy'] = balanced_accuracy_score(
            all_labels, all_preds)

        res_series = pd.Series(list(res.values()), index=list(res.keys()))

        # Write information retrieval type results
        # to a one-line .csv file, using pandas Series
        # as convenient intermediary:
        res_csv_path = os.path.join(res_dir, 'ir_results.csv')
        res_series.to_csv(res_csv_path)

        res_rnd = {}
        for meas_nm, meas_val in res.items():

            # Measure results are either floats (precision, recall, etc.),
            # or np arrays (e.g. precision-per-class). For both
            # cases, round each measure to one digit:

            res_rnd[meas_nm] = round(meas_val,1) if type(meas_val) == float \
                                                 else meas_val.round(1)

        ir_measures_skel = {
            'col_header': ['precision', 'recall', 'f1'],
            'row_labels': ['macro', 'micro', 'weighted'],
            'rows': [[
                res_rnd['prec_macro'], res_rnd['recall_macro'],
                res_rnd['f1_macro']
            ],
                     [
                         res_rnd['prec_micro'], res_rnd['recall_micro'],
                         res_rnd['f1_micro']
                     ],
                     [
                         res_rnd['prec_weighted'], res_rnd['recall_weighted'],
                         res_rnd['f1_weighted']
                     ]]
        }

        ir_per_class_rows = [[
            prec_class, recall_class, f1_class
        ] for prec_class, recall_class, f1_class in zip(
            res_rnd['prec_by_class'], res_rnd['recall_by_class'],
            res_rnd['f1_by_class'])]
        ir_by_class_skel = {
            'col_header': ['precision', 'recall', 'f1'],
            'row_labels': self.class_names,
            'rows': ir_per_class_rows
        }

        accuracy_skel = {
            'col_header': ['accuracy', 'balanced_accuracy'],
            'row_labels': ['Overall'],
            'rows': [[res_rnd['accuracy'], res_rnd['balanced_accuracy']]]
        }

        ir_measures_tbl = GithubTableMaker.make_table(ir_measures_skel,
                                                      sep_lines=False)
        ir_by_class_tbl = GithubTableMaker.make_table(ir_by_class_skel,
                                                      sep_lines=False)
        accuracy_tbl = GithubTableMaker.make_table(accuracy_skel,
                                                   sep_lines=False)

        # Write the markup tables to Tensorboard:
        self.writer.add_text('Information retrieval measures',
                             ir_measures_tbl,
                             global_step=0)
        self.writer.add_text('Per class measures',
                             ir_by_class_tbl,
                             global_step=0)
        self.writer.add_text('Accuracy', accuracy_tbl, global_step=0)

        return res_series
예제 #11
0
class Experiment(Observable):
    """
    This class represents experiment.
    """

    DEFAULT_FEATURE_EXTRACTOR_NAME = "Pass"
    """Name of default feature extractor that is set to attribute.
    If exists."""
    class AttributeSettings(Enum):
        """
        Possible settings types that could be set to an attribute.
        """
        USE = 0
        PATH = 1
        FEATURE_EXTRACTOR = 2
        LABEL = 3

    def __init__(self, filePath: str = None):
        """
        Creation of new experiment or loading of saved.
        
        :param filePath: Path to file. If None than new experiment is created, else
            saved experiment is loaded.
        :type filePath: str| None
        :raise RuntimeError: When there is a problem with plugins.
        :raise ExperimentLoadException: When there is a problem with loading.
        """
        super().__init__()

        self._dataset = None
        self._attributesSet = {}
        self._label = None
        self._featuresSele = []
        self._classifiers = []  # classifiers for testing
        self._evaluationMethod = None
        self.loadSavePath = None  # stores path from which this exp was loaded or where is saved
        self.results = None

        # let's load the plugins that are now available
        # must be called before experiment loading
        # because sets default values
        self._loadPlugins()

        if filePath is not None:
            # load saved experiment
            self._load(filePath)
            self.loadSavePath = filePath

        self._dataStats = None
        self._origDataStats = None

        self._attributesThatShouldBeUsedCache = {}

    def save(self, filePath):
        """
        Saves experiment configuration to given file.
        
        :param filePath: Path to experiment file.
        :type filePath: str
        """
        with open(filePath, "wb") as saveF:
            # let's create Experiment version for saving
            data = {
                "dataSet": self._dataset,
                "attributesSet": self._attributesSet,
                "label": self._label,
                "featuresSele": self._featuresSele,
                "classifiers": self._classifiers,
                "evaluationMethod": self._evaluationMethod,
                "results": self.results
            }
            # save it
            pickle.dump(data, saveF)
            self.loadSavePath = filePath

            LastUsedExperiments().used(filePath)

    def setResults(self, r):
        """
        Sets results. Suitable for use as callback.
        
        :param r: new results.
        :type r: Results
        """
        self.results = r

    def _load(self, filePath):
        """
        Loads saved experiment configuration from given file.
        
        :param filePath: Path to experiment file.
        :type filePath: str
        :raise ExperimentLoadException: When there is a problem with loading.
        """
        with open(filePath, "rb") as loadF:
            try:
                lE = pickle.load(loadF)
            except:
                raise ExperimentLoadException(
                    "Couldn't load given experiment.")

            if not isinstance(lE, dict):
                raise ExperimentLoadException(
                    "Couldn't load given experiment.")

            # check that we have loaded all attributes

            for a in ["dataSet", "attributesSet", "label", \
                      "featuresSele", "classifiers", "evaluationMethod"]:
                if a not in lE:
                    raise ExperimentLoadException(
                        "Couldn't load given experiment.")

            if not isinstance(lE["dataSet"], DataSet):
                raise ExperimentLoadException(
                    "Couldn't load given experiment.")

            self._dataset = lE["dataSet"]

            if not isinstance(lE["attributesSet"], dict):
                raise ExperimentLoadException(
                    "Couldn't load given experiment.")

            self._attributesSet = lE["attributesSet"]

            if not isinstance(lE["label"], str) and lE["label"] is not None:
                raise ExperimentLoadException(
                    "Couldn't load given experiment.")

            self._label = lE["label"]

            if not isinstance(lE["featuresSele"], list) and \
                    any(not isinstance(fs, FeaturesSelector) for fs in lE["featuresSele"]):
                raise ExperimentLoadException(
                    "Couldn't load given experiment.")

            self._featuresSele = lE["featuresSele"]

            if not isinstance(lE["classifiers"], list) and \
                    any(not isinstance(c, Classifier) for c in lE["classifiers"]):
                raise ExperimentLoadException(
                    "Couldn't load given experiment.")

            self._classifiers = lE["classifiers"]

            if not isinstance(lE["evaluationMethod"], Validator):
                raise ExperimentLoadException(
                    "Couldn't load given experiment.")
            self._evaluationMethod = lE["evaluationMethod"]

            if lE["results"] is not None and not isinstance(
                    lE["results"], Results):
                raise ExperimentLoadException(
                    "Couldn't load given experiment.")
            self.results = lE["results"]

            LastUsedExperiments().used(filePath)

    def useDataSubset(self):
        """
        Use only defined subset of data.
        Subset is defined by selected samples.
        Samples are selected according to constraints defined in dataStats.
        """
        self._dataset.useSubset(None)  # clear the old one
        if self._dataStats is not None:
            subset = np.empty(self._dataStats.numberOfSamples)
            counters = copy.copy(self._dataStats.classSamples)

            cnt = 0
            for i, sample in enumerate(self._dataset):
                l = sample[self._label]
                try:
                    if counters[l] > 0:
                        counters[l] -= 1
                        subset[cnt] = i
                        cnt += 1
                except KeyError:
                    # probably class that we want to omit
                    pass

            self.dataset.useSubset(subset)

    @property
    def dataStats(self):
        """
        The data stats. Working copy of original data stats.
        
        :return: Actual stats.
        :rtype: ExperimentDataStatistics | None
        """
        return self._dataStats

    @property
    def origDataStats(self):
        """
        Original data stats. Maybe you are looking for working copy 
        of data stats that you can get with dataStats.
        
        :return: Original data stats.
        :rtype: ExperimentDataStatistics | None
        """
        return self._origDataStats

    @Observable._event("NEW_DATA_STATS")
    def setDataStats(self, stats, actOnly=False):
        """
        Set the data stats. This method overrides working copy
        and original data stats.
        
        :param stats: New stats.
        :type stats: ExperimentDataStatistics
        :param actOnly: If true than overrides only working copy.
            If false than overrides original data to.
            If no original data was set (origData is None) than
            this parameter is ignored and origData is set too.
        :type actOnly: bool
        """

        self._dataStats = copy.deepcopy(stats)
        if self._origDataStats is None or not actOnly:
            self._origDataStats = stats
        else:
            # We must add classes that were filtered out.
            classSamples = self._dataStats.classSamples
            deactivate = []
            for c in self._origDataStats.classes:
                if c not in classSamples:
                    # we set the max, but we must deactivate it
                    # The max is set because if user will decide
                    # that she/he wants to use this class, than
                    # we must set somu initial number of samples.
                    classSamples[c] = self._origDataStats.classSamples[c]
                    deactivate.append(c)

            self._dataStats.classSamples = classSamples
            # lets deactivate it
            for c in deactivate:
                self._dataStats.deactivateClass(c)

    def _loadPlugins(self):
        """
        Loads available plugins.
        Adds default.
        
        :raise RuntimeError: When there is problem with plugins.
        """
        # available features extractors
        if len(FEATURE_EXTRACTORS) == 0:
            raise RuntimeError("There are no features extractors plugins.")

        feTmp = {}
        for fe in FEATURE_EXTRACTORS.values():
            if fe.getName() in feTmp:
                # wow, name collision
                raise RuntimeError(
                    "Collision of features extractors names. For name: " +
                    fe.getName())
            feTmp[fe.getName()] = fe

        # lets put the default feature extractor as the first if exists
        if self.DEFAULT_FEATURE_EXTRACTOR_NAME in feTmp:
            cont = [(self.DEFAULT_FEATURE_EXTRACTOR_NAME,
                     feTmp[self.DEFAULT_FEATURE_EXTRACTOR_NAME])]
            # add the rest
            cont += [(n, p) for n, p in feTmp.items()
                     if n != self.DEFAULT_FEATURE_EXTRACTOR_NAME]
            self._featuresExt = OrderedDict(cont)
        else:
            self._featuresExt = OrderedDict(feTmp)

        # available classifiers
        if len(CLASSIFIERS) == 0:
            raise RuntimeError("There are no classifiers plugins.")

        clsTmp = set()
        for cls in CLASSIFIERS.values():
            if cls.getName() in clsTmp:
                # wow, name collision
                raise RuntimeError(
                    "Collision of classifiers names. For name: " +
                    cls.getName())
            clsTmp.add(cls.getName())

        # available Validators
        self.availableEvaluationMethods = getAllSubclasses(Validator)

        self._evaluationMethod = self.availableEvaluationMethods[0](
        )  # add default

        # available Features selectors
        self.availableFeatureSelectors = getAllSubclasses(FeaturesSelector)

    @property
    def featuresSelectors(self):
        """
        Features selectors for feature selecting.
        """

        return [s.plugin for s in self._featuresSele]

    @property
    def featuresSelectorsSlots(self):
        """
        All used features selectors slots.
        """

        return self._featuresSele

    @property
    def classifiersSlots(self):
        """
        All curently used classifiers slots.
        """
        return self._classifiers

    @property
    def classifiers(self):
        """
        Classifiers for testing.
        """

        return [s.plugin for s in self._classifiers]

    def newClassifierSlot(self):
        """
        Creates new slot for classifier that should be tested.
        
        :return: Classifier slot
        :rtype: PluginSlot
        """
        return self._addPluginSlot(self._classifiers)

    def removeClassifierSlot(self, slot: PluginSlot):
        """
        Remove classifier slot.
        
        :param slot: Slot for classifier.
        :type slot:PluginSlot
        """
        self._removePluginSlot(self._classifiers, slot)

    def newFeaturesSelectorSlot(self):
        """
        Creates new slot for features selector that should be tested.
        
        :return: Features selector slot
        :rtype: PluginSlot
        """
        return self._addPluginSlot(self._featuresSele)

    def removeFeaturesSelectorSlot(self, slot: PluginSlot):
        """
        Remove features selector slot.
        
        :param slot: Slot for features selector.
        :type slot: PluginSlot
        """
        self._removePluginSlot(self._featuresSele, slot)

    def _addPluginSlot(self, bank):
        """
        Creates new slot in given slot bank.
        
        :param bank: Slot bank
        :type bank: List[PluginSlot]
        :return: New slot
        :rtype: PluginSlot
        """
        # lets find first empty id
        slotId = 0 if len(bank) == 0 else max(p.id for p in bank) + 1
        bank.append(PluginSlot(slotId))
        return bank[-1]

    def _removePluginSlot(self, bank: List[PluginSlot], slot: PluginSlot):
        """
        Creates new slot in given slot bank.
        
        :param bank: Slot bank
        :type bank: List[PluginSlot]
        :param slot: Slot that should be removed.
        :type slot: PluginSlot
        """
        bank.remove(slot)

    @property
    def availableClassifiers(self):
        """
        Available classifiers plugins.
        """
        return CLASSIFIERS

    @property
    def featuresExt(self):
        """
        Available features extractors plugins.
        Stored in OrderedDict (name -> plugin). Because it is handy to have default extractor as first
        (if exists). 
        """
        return self._featuresExt

    @Observable._event("NEW_DATA_SET")
    def loadDataset(self, filePath: str):
        """
        Loades dataset.
        
        :param filePath: Path to file with dataset.
        :type filePath: str
        """
        self._dataset = DataSet(filePath)
        # prepare new attribute settings
        self._attributesSet = {
            name: {
                self.AttributeSettings.USE:
                True,
                self.AttributeSettings.PATH:
                False,
                self.AttributeSettings.FEATURE_EXTRACTOR:
                next(iter(self._featuresExt.values()))()
            }
            for name in self._dataset.attributes
        }
        self._label = None
        self._dataStats = None
        self._attributesThatShouldBeUsedCache = {}

    @property
    def evaluationMethod(self):
        """
        Validator used for evaluation.
        """
        return self._evaluationMethod

    @evaluationMethod.setter
    def evaluationMethod(self, val):
        """
        Validator used for evaluation.
        
        :param val: Validtor or name of validator class.
            If name of validator is provided than new  object of it's corresponding class is created.
        :type val:str|Validator
        :raise ValueError: When invalid value is given (unknown name).
        """
        if isinstance(val, Validator):
            self._evaluationMethod = val
        else:
            # self.availableEvaluationMethods is a list because we want to preserve order and therefore
            # we have no other choice than to iterate over it and find the right by name.
            for v in self.availableEvaluationMethods:
                if v.getName() == val:
                    self._evaluationMethod = v()
                    return

            raise ValueError("Unknown Validator name: " + val)

    def setEvaluationMethod(self, val):
        """
        Same as evaluationMethod but can be used as callable
        
        :param val: Validtor or name of validator class.
            If name of validator is provided than new  object of it's corresponding class is created.
        :type val:str|Validator
        :raise ValueError: When invalid value is given (unknown name).
        """
        self.evaluationMethod = val

    @property
    def label(self):
        """
        Attribute name that is set as label or None.
        """
        return self._label

    def getAttributeSetting(self, attribute: str, t):
        """
        Get attribute setting of given type.
        
        :param attribute: The attribute.
        :type attribute: str
        :param t: The setting type.
        :type t: Experiment.AttributeSettings
        """
        if t == Experiment.AttributeSettings.LABEL:
            return self._label == attribute

        return self._attributesSet[attribute][t]

    @Observable._event("ATTRIBUTES_CHANGED")
    def attributesChangedEvent(self):
        """
        This event exists for informing observers that some attribute is no longer used
        or started to be used or when attribute is marked as label.
        
        """
        pass

    def setAttributeSetting(self, attribute: str, t, val):
        """
        Set attribute setting of given type.
        
        :param attribute: The attribute.
        :type attribute: str
        :param t: The setting type.
        :type t: Experiment.AttributeSettings
        :param val: New value. For setting new label val must be true, because if you pass false than
        label will be set to None.
        :type val: bool | Plugin
        :raise KeyError: When the name of attribute is uknown.
        """

        if t == Experiment.AttributeSettings.LABEL:
            self._label = attribute if val else None
            # setting new label invalidates data stats
            self.setDataStats(None)
        else:
            self._attributesSet[attribute][t] = val

        if t == Experiment.AttributeSettings.PATH:
            # we must inform the data set object
            if val:
                self._dataset.addPathAttribute(
                    attribute, self._attributesSet[attribute][
                        Experiment.AttributeSettings.FEATURE_EXTRACTOR].
                    expDataType())
            else:
                self._dataset.removePathAttribute(attribute)

        if t == Experiment.AttributeSettings.FEATURE_EXTRACTOR and \
                attribute in self._dataset.pathAttributes:
            # we must inform the data set object
            self._dataset.addPathAttribute(attribute, val.expDataType())

        if t == Experiment.AttributeSettings.USE or t == Experiment.AttributeSettings.LABEL:
            self._attributesThatShouldBeUsedCache = {}
            self.attributesChangedEvent()

    def attributesThatShouldBeUsed(self, label: bool = True):
        """
        Names of attributes that should be used.
        
        :param label: True means that label attribute should be among them.
        :type label: bool
        """
        # we are preserving original attribute order
        try:
            return self._attributesThatShouldBeUsedCache[label]
        except KeyError:
            res = [a for a in self.dataset.attributes \
                   if self._attributesSet[a][Experiment.AttributeSettings.USE] and (label or a != self._label)]

            self._attributesThatShouldBeUsedCache[label] = res

            return res

    @property
    def dataset(self):
        """
        Loaded dataset.
        """
        return self._dataset
예제 #12
0
def get_more_stats(user):
    def get_episodes_and_time(element):
        if element[1].status == Status.COMPLETED or element[
                1].status == Status.COMPLETED_ANIMATION:
            try:
                return [1, element[0].runtime]
            except:
                return [
                    element[0].total_episodes,
                    int(element[0].episode_duration) *
                    element[0].total_episodes
                ]
        elif element[1].status != Status.PLAN_TO_WATCH and element[
                1].status != Status.RANDOM:
            nb_episodes = [m.episodes for m in element[0].eps_per_season]

            ep_duration = int(element[0].episode_duration)
            ep_counter = 0
            for i in range(0, element[1].current_season - 1):
                ep_counter += int(nb_episodes[i])
            episodes_watched = ep_counter + element[1].last_episode_watched
            time_watched = (ep_duration * episodes_watched)
            return [episodes_watched, time_watched]
        else:
            return [0, 0]

    series_data = db.session.query(Series, SeriesList) \
        .join(SeriesList, SeriesList.media_id == Series.id) \
        .filter(SeriesList.user_id == user.id)

    # test = db.session.query(func.strftime('%Y', Series.first_air_date).label('year'),
    #                         func.count(Series.first_air_date))\
    #     .join(SeriesList, Series.id == SeriesList.media_id)\
    #     .filter(SeriesList.user_id == current_user.id)\
    #     .group_by(func.strftime('%Y', Series.first_air_date)).order_by(text('year desc')).all()

    # test = db.session.query(func.strftime('%Y', Movies.release_date).label('year'),
    #                         func.count(Movies.release_date))\
    #     .join(MoviesList, Movies.id == MoviesList.media_id)\
    #     .filter(MoviesList.user_id == current_user.id)\
    #     .group_by(func.strftime('%Y', Movies.release_date)).order_by(text('year desc')).all()

    # test = db.session.query(SeriesGenre.genre, func.count(SeriesGenre.genre).label('count')) \
    #     .join(SeriesList, SeriesGenre.media_id == SeriesList.media_id) \
    #     .join(Series, Series.id == SeriesList.media_id) \
    #     .filter(SeriesList.user_id == current_user.id)\
    #     .group_by(SeriesGenre.genre).order_by(text('count desc')).all()

    anime_data = db.session.query(Anime, AnimeList) \
        .join(AnimeList, AnimeList.media_id == Anime.id) \
        .filter(AnimeList.user_id == user.id)

    movies_data = db.session.query(Movies, MoviesList) \
        .join(MoviesList, MoviesList.media_id == Movies.id) \
        .filter(MoviesList.user_id == user.id)

    media_data = [series_data, anime_data, movies_data]

    data = {}
    for index, media in enumerate(media_data):
        genres_time = {}
        periods_time = OrderedDict({
            '1960-1969': 0,
            '1970-1979': 0,
            '1980-1989': 0,
            '1990-1999': 0,
            '2000-2009': 0,
            '2010-2019': 0,
            '2020+': 0
        })
        episodes_time = OrderedDict({
            '1-19': 0,
            '20-49': 0,
            '50-99': 0,
            '100-149': 0,
            '150-199': 0,
            '200-299': 0,
            '300-399': 0,
            '400-499': 0,
            '500+': 0
        })
        movies_time = OrderedDict({
            '<1h': 0,
            '1h-1h29': 0,
            '1h30-1h59': 0,
            '2h00-2h29': 0,
            '2h30-2h59': 0,
            '3h+': 0
        })
        for element in media:
            # Number of episodes and the time watched by element
            episodes_watched, time_watched = get_episodes_and_time(element)

            # Genres stats
            for genre in [m.genre for m in element[0].genres]:
                if genre not in genres_time:
                    genres_time[genre] = time_watched
                else:
                    genres_time[genre] += time_watched

            # Period stats
            try:
                airing_year = int(element[0].first_air_date.split('-')[0])
            except:
                try:
                    airing_year = int(element[0].release_date.split('-')[0])
                except:
                    airing_year = 0

            if 1960 <= airing_year < 1970:
                periods_time['1960-1969'] += 1
            elif 1970 <= airing_year < 1980:
                periods_time['1970-1979'] += 1
            elif 1980 <= airing_year < 1990:
                periods_time['1980-1989'] += 1
            elif 1990 <= airing_year < 2000:
                periods_time['1990-1999'] += 1
            elif 2000 <= airing_year < 2010:
                periods_time['2000-2009'] += 1
            elif 2010 <= airing_year < 2020:
                periods_time['2010-2019'] += 1
            elif airing_year >= 2020:
                periods_time['2020+'] += 1

            # Episodes / time stats
            if index != 2:
                if 1 <= episodes_watched < 19:
                    episodes_time['1-19'] += 1
                elif 20 <= episodes_watched < 49:
                    episodes_time['20-49'] += 1
                elif 50 <= episodes_watched < 99:
                    episodes_time['50-99'] += 1
                elif 100 <= episodes_watched < 149:
                    episodes_time['100-149'] += 1
                elif 150 <= episodes_watched < 199:
                    episodes_time['150-199'] += 1
                elif 200 <= episodes_watched < 299:
                    episodes_time['200-299'] += 1
                elif 300 <= episodes_watched < 399:
                    episodes_time['300-399'] += 1
                elif 400 <= episodes_watched < 499:
                    episodes_time['400-499'] += 1
                elif episodes_watched >= 500:
                    episodes_time['500+'] += 1
            else:
                if time_watched < 60:
                    movies_time['<1h'] += 1
                elif 60 <= time_watched < 90:
                    movies_time['1h-1h29'] += 1
                elif 90 <= time_watched < 120:
                    movies_time['1h30-1h59'] += 1
                elif 120 <= time_watched < 150:
                    movies_time['2h00-2h29'] += 1
                elif 150 <= time_watched < 180:
                    movies_time['2h30-2h59'] += 1
                elif time_watched >= 180:
                    movies_time['3h+'] += 1

        # Rename
        if index == 0:
            genres_time['Action/Adventure'] = genres_time.pop(
                'Action & Adventure', 0)
            genres_time['War/Politics'] = genres_time.pop('War & Politics', 0)
            genres_time['Sci-Fi/Fantasy'] = genres_time.pop(
                'Sci-Fi & Fantasy', 0)
            genres_time.pop('Unknown', 0)

        if all(x == 0 for x in genres_time.values()):
            genres_time = {}
        else:
            genres_time = sorted(genres_time.items(),
                                 key=operator.itemgetter(1),
                                 reverse=True)
        if all(x == 0 for x in periods_time.values()):
            periods_time = {}
        if all(x == 0 for x in episodes_time.values()):
            episodes_time = {}
        if all(x == 0 for x in movies_time.values()):
            movies_time = {}

        if index == 0:
            data.update({
                'Series_genres': genres_time,
                'Series_periods': periods_time,
                'Series_episodes': episodes_time
            })
        elif index == 1:
            data.update({
                'Anime_genres': genres_time,
                'Anime_periods': periods_time,
                'Anime_episodes': episodes_time
            })
        else:
            data.update({
                'Movies_genres': genres_time,
                'Movies_periods': periods_time,
                'Movies_times': movies_time
            })

    return data