Ejemplo n.º 1
0
    def test_no_count(self):
        """
        Test for an empty counter
        """

        ctr = enlighten.Counter(total=10, desc='Test', unit='ticks')
        formatted = ctr.format(width=80)
        self.assertEqual(len(formatted), 80)
        self.assertRegex(formatted, r'Test   0%\|[ ]+ \|  0/10 \[00:0\d<\?, 0.00 ticks/s\]')

        # No unit, no description
        ctr = enlighten.Counter(total=10)
        formatted = ctr.format(width=80)
        self.assertEqual(len(formatted), 80)
        self.assertRegex(formatted, r'  0%\|[ ]+ \|  0/10 \[00:0\d<\?, 0.00/s\]')
Ejemplo n.º 2
0
	def pack(self, files):
		if not self.path:
			return False
			
		Print.info('\tRepacking to NSP...')
		
		hd = self.generateHeader(files)
		
		totalSize = len(hd) + sum(os.path.getsize(file) for file in files)
		if os.path.exists(self.path) and os.path.getsize(self.path) == totalSize:
			Print.info('\t\tRepack %s is already complete!' % self.path)
			return
			
		t = enlighten.Counter(total=totalSize, unit='B', desc=os.path.basename(self.path), leave=False)
		
		t.write('\t\tWriting header...')
		outf = open(self.path, 'wb')
		outf.write(hd)
		t.update(len(hd))
		
		done = 0
		for file in files:
			t.write('\t\tAppending %s...' % os.path.basename(file))
			with open(file, 'rb') as inf:
				while True:
					buf = inf.read(4096)
					if not buf:
						break
					outf.write(buf)
					t.update(len(buf))
		t.close()
		
		Print.info('\t\tRepacked to %s!' % outf.name)
		outf.close()
Ejemplo n.º 3
0
def call_extract_multible_xml(args):
    '''
    A specify extractor for multible xml files that keep their text in the 's' mark.
    The function extracts text form mulitble files and exports .txt files that have the names of the 
    subfolders in rootfolder.
    '''
    list_of_file_paths = get_file_directories(args)

    pbar = enlighten.Counter(total=len(list_of_file_paths),
                             desc='Extracting files')

    dir_info = directory_info(args)
    root_name = dir_info.get_root_name()
    name = get_subfolder_name(list_of_file_paths[0], root_name)

    list_to_create = []
    for path in list_of_file_paths:
        current_subfolder = get_subfolder_name(path, root_name)

        #Compare current subfolder to name. Given that the list_of_file_paths
        #is linear this will create files with the names of the subfolder and
        #then reset the list_to_create
        if name != current_subfolder or len(list_to_create) > 500:
            filename = '.\data\\05 - raw\\' + name + '.txt'
            create_file(list_to_create, filename, mode='a')
            list_to_create = []
            name = current_subfolder

        list_to_create.extend(xml_extractor(path))

        pbar.update()

    filename = '.\data\\05 - raw\\' + name + '.txt'
    create_file(list_to_create, filename, mode='a')
Ejemplo n.º 4
0
def load_demo(path_to_json_data, mongo_db, convert_to_ensembl=False):
    """Inserts demo patient data into database
        Demo data consists of a set of 50 patients from this paper: http://onlinelibrary.wiley.com/doi/10.1002/humu.22850

        Args:
            path_to_demo_data(str): absolute path to json file containing the demo patients.
            mongo_db(pymongo.database.Database)

        Returns:
            inserted_ids(list): the database ID of the inserted patients
    """
    patients = [] # a list of dictionaries
    inserted_ids = []

    #open json file and try to insert one patient at the time
    try:
        LOG.info('reading patients file')
        with open(path_to_json_data) as json_data:
            patients = json.load(json_data)
            # create a progress bar
            pbar = enlighten.Counter(total=len(patients), desc='', unit='patients')
            for json_patient in patients:

                #parse patient into format accepted by database
                patient = mme_patient(json_patient, convert_to_ensembl)

                inserted_id = backend_add_patient(mongo_db=mongo_db, patient=patient)[1]
                if inserted_id:
                    inserted_ids.append(inserted_id)
                pbar.update()

    except Exception as err:
        LOG.fatal("An error occurred while importing benchmarking patients: {}".format(err))

    return inserted_ids
Ejemplo n.º 5
0
def augment_data(args, data):
    # Augmenta data

    augmented_data = pd.DataFrame(columns=['wav_filename', 'wav_filesize', 'transcript', 'duration'])
    data_augment = sample(args, data, True)

    random.seed(args.augment_seed)

    shifts = [random.randint(1, 5) for _ in range (len(data_augment))]
    
    pbar = enlighten.Counter(total=len(data_augment), desc='Augmenting train set')

    for i, row in data_augment.iterrows():
        pbar.update()
        # Augmenta þetta sýni og vista undir augmented
        y, sr = manipulate(row['wav_filename'], shifts[i])
        _, filename = os.path.split(row['wav_filename'])
        filepath = os.path.join(args.malromur_dir, 'augmented', filename)
        sf.write(filepath, y, sr, 'PCM_16')
        
        # Búa til new row
        new_row = row
        new_row['wav_filename'] = filepath
        new_row['wav_filesize'] = os.path.getsize(filepath)

        # Bæta new row við augmented_data
        augmented_data = augmented_data.append(row, ignore_index=True)

    # Sameina við data
    new_data = data.append(augmented_data, ignore_index=True)

    return new_data, augmented_data['duration'].sum()
Ejemplo n.º 6
0
def remove_variants(conn, dataset, list_of_var_tuples):
    """
    Deletes variants from beacon
    """
    delete_counter = 0
    LOG.info('Deleting variants from database..')

    #loop over each sample(key) of the dictionary:
    click.echo("variants to remove:%s (it might take some time!)" %
               len(list_of_var_tuples))
    pbar = enlighten.Counter(total=len(list_of_var_tuples),
                             desc='',
                             unit='ticks')

    for var_tuple in list_of_var_tuples:
        try:
            unique_key = dataset + "_" + str(var_tuple[0]) + "_" + str(
                var_tuple[1]) + "_" + var_tuple[2]
            # Remove 1 from the occurrence field if this is not the last occurrence
            sql = "update beacon_data_table set occurrence = occurrence -1 where chr_pos_alt_dset=%s"
            result = conn.execute(sql, unique_key)
            if result.rowcount > 0:
                delete_counter += 1
            pbar.update()

        except Exception as ex:
            print('Unexpected error:', ex)

    # delete all records with no samples associated:
    if delete_counter > 0:
        sql = "delete from beacon_data_table where occurrence = 0;"
        conn.execute(sql)
    return delete_counter
Ejemplo n.º 7
0
 def setUp(self):
     os.environ['TERM'] = 'vt100'
     self.tty = MockTTY()
     self.manager = MockManager(stream=self.tty.stdout)
     self.ctr = enlighten.Counter(total=10, desc='Test', unit='ticks', manager=self.manager)
     self.manager.counters[self.ctr] = 3
     self.output = r'Test   0%\|[ ]+ \|  0/10 \[00:0\d<\?, 0.00 ticks/s\]'
Ejemplo n.º 8
0
 def ___test_model___(self, clusters, model):
     self.logger.debug(f'Testing model: modelSize = {self.modelSize}, clusters = {self.clustersSize}, tskip|tsize = {self.trainSkip}|{self.trainSize}')
     pbar = enlighten.Counter(total=self.trainSize, desc='Tested', unit='matches')
     testResult = {'matches': 0, 'correct': 0, 'nodata': 0}
     for x in self.con["data"].find(skip=self.trainSkip, limit = self.trainSize):
         if any(player is {} or 'hero_id' not in player or player['hero_id'] in [0,None] for player in x['players']):
             pbar.update()
             continue
         key = []
         hkey = []
         for player in x['players']:
             heroId = player['hero_id']
             hkey.append(self.originalHeroes[heroId])
             key.append(clusters[self.originalHeroes[heroId]])
         radiant = sorted(key[0:5])
         dire = sorted(key[5:10])
         rd = repr(radiant+dire)
         dr = repr(dire+radiant)
         if dr in model:
             key = dr
         elif rd in model:
             key = rd
         else:
             testResult['nodata']+=1
             pbar.update()
             continue
         adv = self.___evaluate_advantage___(hkey)
         evaluation =  model[key]['radiantwin'] / model[key]['matches'] + adv*self.multiplier
         isRadiant = evaluation > self.activator
         testResult['matches']+=1
         testResult['correct']+=int(isRadiant == x['radiant_win'])
         pbar.update()
     self.logger.debug(f'Testing is done!')
     self.logger.debug(f'Accuracy: {testResult["correct"]/testResult["matches"]*100}; correct|tested: {testResult["correct"]}|{testResult["matches"]}; activator|mult: {self.activator}|{self.multiplier}; modelsize|cl = {self.modelSize}|{self.clustersSize}; tskip|tsize = {self.trainSkip}|{self.trainSize}.')
     return testResult
Ejemplo n.º 9
0
 def ___train_model___(self, clusters):
     self.logger.debug(f'Training model: modelSize = {self.modelSize}, clusters = {self.clustersSize}.')
     path = os.path.join(os.path.dirname(__file__), '{}/{}_{}.pickle'.format('models',self.modelSize, self.clustersSize))
     if os.path.exists(path):
         self.logger.debug(f'Found cached data: MODEL/{self.modelSize}_{self.clustersSize} - {path}')
         return self.___deserialize___(path)
     else:   
         pbar = enlighten.Counter(total=self.modelSize, desc='Training', unit='matches')
         model = {}
         for x in self.con["data"].find(limit = self.modelSize):
             if any(player is {} or 'hero_id' not in player or player['hero_id'] in [0,None] for player in x['players']):
                 pbar.update()
                 continue
             key = []
             for player in x['players']:
                 heroId = player['hero_id']
                 key.append(clusters[self.originalHeroes[heroId]])
             radiant = sorted(key[0:5])
             dire = sorted(key[5:10])
             rd = repr(radiant+dire)
             dr = repr(dire+radiant)
             key = rd
             if dr in model:
                 key = dr
             elif rd in model:
                 key = rd
             else:
                 model[key] = {'matches' : 0, 'radiantwin': 0}
             model[key]['matches']+=1
             model[key]['radiantwin']+=int(x['radiant_win'])
             pbar.update()
         self.___serialize___('models','{}_{}'.format(self.modelSize, self.clustersSize), model)
         self.logger.debug(f'Done training model: modelSize = {self.modelSize}, clusters = {self.clustersSize}.')
         return model
Ejemplo n.º 10
0
def testRange(activatorStart = 0.4, activatorEnd = 0.6, activatorStep = 0.005, 
        multiplierStart = 0.0, multiplierEnd = 2.0, multiplierStep = 0.02):
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    activator = np.arange(activatorStart, activatorEnd, activatorStep, dtype=np.dtype(float))
    multiplier = np.arange(multiplierStart, multiplierEnd, multiplierStep, dtype=np.dtype(float))
    X, Y = np.meshgrid(activator, multiplier)
    Z = np.copy(X)
    pbar = enlighten.Counter(total=np.size(Z,1)*np.size(Z,0), desc='Progress', unit='tests')
    for i in range(np.size(Z,1)):
        for j in range(np.size(Z,0)):
            model.set_params(X[0,i].item(), Y[j,i].item())
            trained = model.clusterize_train(2)
            Z[j,i] = accuracy(trained)
            pbar.update()

    surf = ax.plot_surface(X, Y, Z, cmap= cm.get_cmap("coolwarm"), rstride=1, cstride=1,
                        linewidth=0, antialiased=True)

    ax.set_zlim(46, 61)
    ax.set_xlabel('Активатор') 
    ax.set_xticks(activator)
    ax.set_ylabel('Множитель') 
    ax.set_yticks(multiplier)
    ax.set_zlabel('Точность предсказания') 
    ax.zaxis.set_major_locator(LinearLocator(10))
    ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

    fig.colorbar(surf, shrink=0.3, aspect=5)

    plt.show()
Ejemplo n.º 11
0
 def _hero_matrix_(self):
     path = os.path.join(os.path.dirname(__file__), '{}/{}.pickle'.format('matrices',self.trainSkip))
     if os.path.exists(path):
         self.logger.debug(f'Found cached data: HEROMATRIX: {self.trainSkip} - {path}')
         self.matrix  = self.___deserialize___(path)
     else:   
         self.logger.debug(f'Building HEROMATRIX: {self.trainSkip} matches.')
         pbar = enlighten.Counter(total=self.trainSkip, desc='Heromatrix', unit='matches')
         matrix = [[{'matches': 0, 'won':0, 'winrate' : 0} for x in range(len(self.heroesArray))] for y in range(len(self.heroesArray))] 
         for x in self.con["data"].find(limit = self.trainSkip):
             if any(player is {} or 'hero_id' not in player or player['hero_id'] in [None,0] for player in x['players']):
                 pbar.update()
                 continue
             for i in range(0,5):
                 for j in range (i+1, 5):
                     self.___eval_matrix_match___(matrix, x, i, j, False)
                 for j in range (5,10):
                     a,b = self.originalHeroes[x['players'][i]['hero_id']], self.originalHeroes[x['players'][j]['hero_id']]
                     r,c = max(a,b), min(a,b)
                     matrix[r][c]['matches']+=1
                     matrix[r][c]['won']+= (a == r) == x['radiant_win']
             for i in range(5,9):
                 for j in range (i+1, 10):
                     self.___eval_matrix_match___(matrix, x, i, j, True)
             pbar.update()
         for i in range(0,len(self.heroesArray)):
             for j in range (i+1, len(self.heroesArray)):
                 matrix[i][j]['winrate'] = 0 if matrix[i][j]['matches'] == 0 else matrix[i][j]['won'] / matrix[i][j]['matches']
                 matrix[j][i]['winrate'] = 0 if matrix[j][i]['matches'] == 0 else matrix[j][i]['won'] / matrix[j][i]['matches']
         self.___serialize___('matrices', self.trainSkip, matrix)
         self.matrix = matrix
         self.logger.debug(f'Done building HEROMATRIX: {self.trainSkip} matches.')
     return self.matrix
Ejemplo n.º 12
0
        def eliminateDuplicateVectors(database):
            '''
            Since it requires a lot of time, importing directory does not call
            eliminate duplicate vectors from database.
            It can be done by manually calling this function.

            It eliminates for every class...
            '''

            all_processes = []
            allCatIds = list(database.keys())

            print('Optimizing database, printing approximate progress..')
            total_classes = len(self._database)
            serial_cat_id = allCatIds[np.argmax(
                [len(self._database[xx]) for xx in self._database])]
            parallel_cat_ids = list(set(allCatIds) - set([serial_cat_id]))

            bar = enlighten.Counter(total=len(self._database[serial_cat_id]))

            p = Pool(total_classes)
            # print(list(map(lambda catId : (database, catId, self._new_cat_threshold), allCatIds[:-1])))
            # a,b,c=list(map(lambda catId: [self, catId, bar], allCatIds))[0]
            # print(b)
            # pdb.set_trace()
            eliminateList = p.map_async(
                Identifier._eliminateDuplicateVectors_parallel,
                list(
                    map(
                        lambda catId: [
                            catId, self._database[catId], self.
                            _new_cat_threshold
                        ], parallel_cat_ids)))

            catId = serial_cat_id
            eliminateThose = []

            for index1, vector1 in enumerate(database[catId]):
                bar.update()
                for index2, vector2 in enumerate(database[catId]):

                    if index1 != index2 and np.linalg.norm(
                            vector1 - vector2
                    ) < self._new_cat_threshold:  # TODO : note this is static!
                        eliminateThose.append(index1)

            eliminateThose = list(np.unique(eliminateThose))

            p.close()
            p.join()
            eliminateList = eliminateList.get()
            eliminateList.append((serial_cat_id, eliminateThose))
            for identity_and_cat in eliminateList:
                catId, identity = identity_and_cat

                identity.sort(reverse=True)
                for el in identity:
                    database[catId].pop(el)
Ejemplo n.º 13
0
    def test_full_bar(self):

        ctr = enlighten.Counter(total=10, desc='Test', unit='ticks')
        ctr.count = 10
        ctr.start = time.time() - 10
        formatted = ctr.format(width=80)
        self.assertEqual(len(formatted), 80)
        self.assertRegex(formatted,
                         r'Test 100%\|' + u'█+' + r'\| 10/10 \[00:\d\d<00:00, \d.\d\d ticks/s\]')
Ejemplo n.º 14
0
    def test_zero_total(self):
        """
        If the total is 0, the bar should be full
        """

        ctr = enlighten.Counter(total=0, desc='Test', unit='ticks')
        formatted = ctr.format(width=80)
        self.assertEqual(len(formatted), 80)
        self.assertRegex(formatted, r'Test 100%\|' u'█+' + r'\| 0/0 \[00:0\d<00:00, 0.00 ticks/s\]')
Ejemplo n.º 15
0
def process_files():
    """
    Process files with a single progress bar
    """

    with enlighten.Counter(total=100, desc='Simple', unit='ticks') as pbar:
        for _ in range(100):
            time.sleep(0.05)
            pbar.update()
Ejemplo n.º 16
0
def process_files():
    """
    Process files with a single progress bar
    """

    with enlighten.Counter(total=100, desc='Simple', unit='ticks') as pbar:
        for num in range(100):  # pylint: disable=unused-variable
            time.sleep(0.05)
            pbar.update()
Ejemplo n.º 17
0
    def test_format_no_total(self):

        # No unit, No desc
        ctr = enlighten.Counter()
        self.assertRegex(ctr.format(width=80), r'0 \[00:0\d, 0.00/s\]')
        ctr.count = 50
        ctr.start = time.time() - 50
        self.assertRegex(ctr.format(width=80), r'50 \[00:5\d, \d.\d\d/s\]')

        # With unit and description
        ctr = enlighten.Counter(desc='Test', unit='ticks')
        rtn = ctr.format(width=80)
        self.assertEqual(len(rtn), 80)
        self.assertRegex(rtn, r'Test 0 ticks \[00:0\d, 0.00 ticks/s\]')
        ctr.count = 50
        ctr.start = time.time() - 50
        rtn = ctr.format(width=80)
        self.assertEqual(len(rtn), 80)
        self.assertRegex(rtn, r'Test 50 ticks \[00:5\d, \d.\d\d ticks/s\]')
Ejemplo n.º 18
0
    def PerformutationFeatureImportance(self):
        if self.data.shape[0]>100000:
            data = self.data.sample(n=100000,axis=0)
        else:
            data = self.data
        inputsLL = np.hsplit(data[self.inputs].astype(np.float32).values,len(self.inputs))
        has_LBN = any([l.__class__.__name__ == 'LBNLayer' for l in self.model.layers])
        if has_LBN:
            inputsLBN = data[self.LBN_inputs].astype(np.float32).values.reshape(-1,4,len(self.LBN_inputs)//4)
            inputs = (inputsLL,inputsLBN)
        else:
            inputs = inputsLL
        logging.info("Producing output for true score")
        outputs = self.model.predict(inputs,batch_size=10000,verbose=0)
        targets = data[self.outputs].values
        true_F1_score = f1_score(targets.argmax(1),outputs.argmax(1),average='macro')

        permutations = 10
        f1_scores = np.zeros(len(self.inputs))
        f1_scores_err = np.zeros(len(self.inputs))
        logging.info("Producing output for permutation scores")
        pbar = enlighten.Counter(total=len(self.inputs), desc='Permutations', unit='Input') 
        for idxPerm,inputName in enumerate(self.inputs):
            pbar.update()
            logging.info("Looking at input %d/%d"%(idxPerm,len(self.inputs)))
            inputs_perm = copy.deepcopy(inputsLL)
            perm_f1_scores = []
            for perm in range(permutations):
                logging.info("... Permutation %d/%d"%(perm,permutations))
                np.random.shuffle(inputs_perm[idxPerm])
                if has_LBN:
                    outputs_perm = self.model.predict((inputs_perm,inputsLBN),batch_size=10000,verbose=0)
                else:
                    outputs_perm = self.model.predict(inputs_perm,batch_size=10000,verbose=0)
                perm_f1_scores.append(f1_score(targets.argmax(1),outputs_perm.argmax(1),average='macro'))
            perm_f1_scores = np.array(perm_f1_scores)
            f1_scores[idxPerm] = abs(perm_f1_scores.mean()-true_F1_score)/true_F1_score
            f1_scores_err[idxPerm] = perm_f1_scores.std()
        
        idxSort = np.flip(np.argsort(f1_scores))
        inputNames = np.array(self.inputs,dtype=object)[idxSort]
        f1_scores = f1_scores[idxSort]
        f1_scores_err = f1_scores_err[idxSort]

        fig, ax = plt.subplots(figsize=(8,15))
        plt.subplots_adjust(left=0.15, right=0.95, top=0.95, bottom=0.1)
        y_pos = np.arange(len(self.inputs))
        ax.barh(y_pos,f1_scores,xerr=f1_scores_err,align='center')
        ax.set_yticks(y_pos)
        ax.set_yticklabels(inputNames,size=int(80/math.sqrt(len(self.inputs))))
        ax.invert_yaxis()  # labels read top-to-bottom
        ax.set_xlabel('Importance',fontsize=18)
        name = os.path.join(self.path_out,'feature_importance.png')
        fig.savefig(name)
        logging.info("... saved as %s"%name)
Ejemplo n.º 19
0
    def test_direct(self):
        ctr = enlighten.Counter(stream=self.tty.stdout, total=100, desc='Test', unit='ticks')
        self.assertIsInstance(ctr.manager, enlighten.Manager)
        ctr.start = time.time() - 50
        ctr.update(50, force=True)

        self.tty.stdout.write('X\n')
        value = self.tty.stdread.readline()
        if enlighten.NEEDS_UNICODE_HELP:
            value = value.decode('utf-8')

        self.assertRegex(value, r'Test  50%\|' + u'█+[▏▎▍▌▋▊▉]?' +
                         r'[ ]+\|  50/100 \[00:5\d<00:5\d, \d.\d\d ticks/s\]X\n')

        with mock.patch.object(self.tty, 'stdout', wraps=self.tty.stdout) as mockstdout:
            ctr = enlighten.Counter(stream=self.tty.stdout, total=100, desc='Test', unit='ticks')
            ctr.refresh(flush=False)
            self.assertFalse(mockstdout.flush.called)
            ctr.refresh(flush=True)
            self.assertTrue(mockstdout.flush.called)
Ejemplo n.º 20
0
    def test_custom_series(self):
        ctr = enlighten.Counter(total=100, desc='Test', unit='ticks', series=[' ', '>', '-'])
        ctr.count = 50
        formatted = ctr.format(elapsed=50, width=80)
        self.assertEqual(len(formatted), 80)
        self.assertRegex(formatted, r'Test  50%\|' + u'-+[>]?' +
                         r'[ ]+\|  50/100 \[00:5\d<00:5\d, \d.\d\d ticks/s\]')

        ctr.count = 13
        formatted = ctr.format(elapsed=13, width=80)
        self.assertEqual(len(formatted), 80)
        self.assertRegex(formatted, r'Test  13%\|' + u'---->' +
                         r'[ ]+\|  13/100 \[00:1\d<01:\d\d, \d.\d\d ticks/s\]')

        ctr = enlighten.Counter(total=100, desc='Test', unit='ticks', series=[u'⭘', u'⬤'])
        ctr.count = 50
        formatted = ctr.format(elapsed=50, width=80)
        self.assertEqual(len(formatted), 80)
        self.assertRegex(formatted, r'Test  50%\|' + u'⬤+⭘+' +
                         r'\|  50/100 \[00:5\d<00:5\d, \d.\d\d ticks/s\]')
Ejemplo n.º 21
0
    def test_format_count_gt_total(self):
        """
        Counter should fall back to no-total format if count is greater than total
        """

        ctr = enlighten.Counter(total=10, desc='Test', unit='ticks')
        ctr.count = 50
        ctr.start = time.time() - 50
        rtn = ctr.format(width=80)
        self.assertEqual(len(rtn), 80)
        self.assertRegex(rtn, r'Test 50 ticks \[00:5\d, \d.\d\d ticks/s\]')
Ejemplo n.º 22
0
def process_files(count=None):
    """
    Process files with a single progress bar
    """

    pbar = enlighten.Counter(total=count, desc='Simple', unit='ticks',
                             bar_format=BAR_FMT, counter_format=COUNTER_FMT)

    for _ in range(100):
        time.sleep(0.05)
        pbar.update(1.1)
Ejemplo n.º 23
0
def fetch_urls(browser, max_scrolls, scroll_wait_time, logger):
    """Catch all the pictures links of the Instagram profile."""
    links = []
    links.extend(re.findall("/p/([^/]+)/", browser.page_source))

    logger.info("Scrolling the Instagram, scraping pictures URLs ... ")

    pbar = enlighten.Counter(total=max_scrolls,
                             desc='Scrolling',
                             unit='scrolls')

    try:
        prev_link_batch = None
        scroll_count = 0
        while scroll_count < max_scrolls:
            page_source = browser.page_source

            # check if browser has Loading icon, i.e. there is still new images to load
            if not "Loading..." in page_source:
                logger.info(
                    f"`Loading...` string not found, I suppose that the we reached the end of page. Stopping scrolling. (scroll count: {scroll_count})"
                )
                break

            # scroll
            browser.execute_script(
                "window.scrollTo(0, document.body.scrollHeight)")
            link_batch = re.findall("/p/([^/]+)/", page_source)
            if link_batch == prev_link_batch:
                logger.warning("Ineffective scroll")
            else:
                links.extend(link_batch)
                scroll_count += 1
                pbar.update()
            prev_link_batch = link_batch

            # sleep between scrolls
            time.sleep(scroll_wait_time)
        if max_scrolls == scroll_count:
            logger.info(
                f"Reached max_scrolls. Stop scrolling. (scroll count: {max_scrolls})"
            )
    except KeyboardInterrupt as e:
        logger.warning(
            'KeyboardInterrupt. Stopping scrolling and will continue with the rest.'
        )
    except Exception as e:
        logger.warning('Error occured while scrolling: %s', e)
        logger.warning(
            "But we will continue with what we have scraped so far ;)")
    logger.info("Pictures links collected")
    return list(set(links))  # remove duplicates
Ejemplo n.º 24
0
def add_baitset(file, db_uri, email, panel_name, version, genomic_build):

    LOG.info("saving baitset {}.{} to database".format(panel_name, version))

    client = MongoClient(db_uri)
    db_name = db_uri.split('/')[-1]
    db = client[db_name]
    adapter = PanelAdapter(client=client, db_name=db_name)

    created_baitset_id = adapter.add_baitset(name=panel_name,
                                             version=version,
                                             build=genomic_build)

    inserted_baits = 0

    # if baitset creation was successful, insert baits
    if created_baitset_id:

        # obtain all baits from this baitset, formatted as objects
        baits_list = baits(path_to_file=file,
                           created_baitset_id=created_baitset_id)

        # insert one bait at the time into db
        pbar = enlighten.Counter(total=len(baits_list), desc='', unit='ticks')
        for bait in baits_list:
            try:
                # add the bait
                inserted_bait_id = adapter.add_bait(bait)
                inserted_baits += 1

            except pymongo.errors.DuplicateKeyError:
                # if bait exists do nothing
                LOG.info('pymongo DuplicateKeyError')

            finally:
                # update baitset with list of baits contained in it
                updated_baitset = db.baitset.find_one_and_update(
                    {'_id': created_baitset_id},
                    {'$push': {
                        'baits': bait['_id']
                    }},
                    upsert=True)
            pbar.update()

        LOG.info(
            'created baitset with ID {0}. Inserted {1} out of {2} new available baits into db'
            .format(created_baitset_id, inserted_baits, len(baits_list)))
    else:
        LOG.error(
            "Something went wrong and the baitset coudn't be saved to db.")
Ejemplo n.º 25
0
def dump_collection(db, collection):
    f = open(f'{db.name}({collection})-{datetime.now()}.json', 'w')

    print(f'{db.name}({collection})-{datetime.now()}')

    collection = db[collection]
    pbar = enlighten.Counter(total=collection.find({}).count(),
                             desc='Basic',
                             unit='documents')
    cursor = collection.find({})
    for document in cursor:
        f.write(f'{json.dumps(document, default=str, ensure_ascii=False)}\n')
        pbar.update()

    pbar.close()
Ejemplo n.º 26
0
def main(argv):
    # Process args.
    parser = argparse.ArgumentParser(
        prog="check_dif_statuses",
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        "repo_top",
        help=
        """Relative path to where the OpenTitan repository is checked out.""")
    parser.add_argument(
        "--show-unimplemented",
        action="store_true",
        help="""Show unimplemented functions for each incomplete DIF.""")
    parser.add_argument("--table-format",
                        type=str,
                        choices=["grid", "github", "pipe"],
                        default="grid",
                        help="""Format to print status tables in.""")
    args = parser.parse_args(argv)

    # Define root path of DIFs.
    difs_root_path = os.path.join("sw", "device", "lib", "dif")

    # Get list of all DIF basenames.
    # TODO: automatically get the list below by cross referencing DIF names
    # with IP block names. Hardcoded for now.
    shared_headers = ["dif_warn_unused_result"]
    difs = get_list_of_difs(difs_root_path, shared_headers)

    # Get DIF statuses (while displaying a progress bar).
    dif_statuses = []
    progress_bar = enlighten.Counter(total=len(difs),
                                     desc="Analyzing statuses of DIFs ...",
                                     unit="DIFs")
    for dif in difs:
        dif_statuses.append(DIFStatus(args.repo_top, difs_root_path, dif))
        progress_bar.update()

    # Build table and print it to STDOUT.
    print_status_table(dif_statuses, args.table_format)
    if args.show_unimplemented:
        print_unimplemented_functions(dif_statuses, args.table_format)
def run_task_loop(sqs, sns):
    first_attempt = True
    notified = False
    counter = enlighten.Counter(desc='Batches processed', unit='batches')
    while True:
        found_msgs = False  # flag to check if any msgs were read in an attempt
        for task_body, task_handle in get_tasks(sqs=sqs):
            process(task_body)
            delete_task(sqs, task_handle)
            found_msgs = True
            notified = False
            first_attempt = False

        if not found_msgs and not notified and not first_attempt:
            # if no more tasks present, i.e a batch is completely processed, send notification
            notify(sns=sns)
            notified = True
            counter.update()

        time.sleep(0.1)
Ejemplo n.º 28
0
def _download_file(link, filename, chunk_size, progress):
    with session.get(link, stream=True,
                     allow_redirects=True) as resp, open(filename,
                                                         "wb") as out:
        resp.raise_for_status()
        if resp.url != link:
            logger.info(f"Original link: {link}")
            logger.info(f"Resolved link: {resp.url}")

        total_size = int(resp.headers.get("content-length", "0"))
        pbar = enlighten.Counter(total=total_size,
                                 desc=filename,
                                 enabled=progress,
                                 unit="B",
                                 min_delta=0.5)

        for chunk in resp.iter_content(chunk_size=chunk_size):
            out.write(chunk)
            pbar.update(len(chunk))
    return total_size
Ejemplo n.º 29
0
    def test_floats(self):
        """
        Using floats for total and count is supported by the logic, but not by the
        default format strings
        """

        ctr = enlighten.Counter(total=100.2, desc='Test', unit='ticks', min_delta=500)
        ctr.update(50.1)
        self.assertEqual(ctr.count, 50.1)

        # Won't work with default formatting
        with self.assertRaises(ValueError):
            formatted = ctr.format(elapsed=50.1)

        ctr.bar_format = u'{desc}{desc_pad}{percentage:3.0f}%|{bar}| {count:.1f}/{total:.1f} ' + \
                         u'[{elapsed}<{eta}, {rate:.2f}{unit_pad}{unit}/s]'

        formatted = ctr.format(elapsed=50.1, width=80)
        self.assertEqual(len(formatted), 80)
        self.assertRegex(formatted, r'Test  50%\|' + u'█+' +
                         r'[ ]+\| 50.1/100.2 \[00:5\d<00:5\d, \d.\d\d ticks/s\]')
Ejemplo n.º 30
0
 def ___find_averages___(self, data):
     logmsg = 'evaluating average hero statistics.'
     self.logger.debug(f'Starting: {logmsg}')
     pbar = enlighten.Counter(total=len(data['values']), desc='Averages', unit='hero')
     properties = playerOnly.__len__()
     averages = [{ 'data': np.zeros(properties), 'amount' : 0 } for f in repeat(None, self.heroesArray.__len__())]
     finalData = [np.zeros(properties) for f in repeat(None, self.heroesArray.__len__())]
     for i in range(len(data['values'])):
         heroId = data['heroes'][i]
         if not (heroId is None):
             entry = averages[heroId]
             for k in range(properties):
                 entry['data'][k] += data['values'][i, k]
             entry['amount']+=1
         pbar.update()
     for i in range(len(averages)):
         entry = averages[i]
         if entry['amount'] != 0:
             for k in range(properties):
                 finalData[i][k] = entry['data'][k] / entry['amount']
     self.logger.debug(f'Done: {logmsg}')
     return finalData