def update_choices(): print('generating filter choices...') events = models.event.objects.all() choice_count = len(event_choices) + len(injection_choices) + \ len(result_choices) progress_bar = ProgressBar( max_value=choice_count, widgets=[ Percentage(), ' (', SimpleProgress(format='%(value)d/%(max_value)d'), ') ', Bar(), ' ', Timer() ]) count = 0 for attribute in event_choices: count += 1 progress_bar.update(count) event_choices[attribute] = choices(events, attribute) injections = models.injection.objects.all() for attribute in injection_choices: count += 1 progress_bar.update(count) injection_choices[attribute] = choices(injections, attribute) results = models.result.objects.all() for attribute in result_choices: count += 1 progress_bar.update(count) result_choices[attribute] = choices(results, attribute) print()
def update_hashes(options): campaign = get_campaign(options) progress_bar = ProgressBar( max_value=campaign.result_set.count(), widgets=[ Percentage(), ' (', SimpleProgress(format='%(value)d/%(max_value)d'), ') ', Bar(), ' ', Timer() ]) count = 0 with open( 'campaign-data/{}/gold/{}'.format(campaign.id, campaign.output_file), 'rb') as solution_file: solution_contents = solution_file.read() gold_hash = sha256(solution_contents).hexdigest() for result in campaign.result_set.all(): if result.data_diff == 1.0: result.data_hash = gold_hash result.save() else: result_location = 'campaign-data/{}/results/{}/{}'.format( campaign.id, result.id, campaign.output_file) if exists(result_location): with open(result_location, 'rb') as result_file: result_contents = result_file.read() result.data_hash = sha256(result_contents).hexdigest() result.save() count += 1 progress_bar.update(count)
def get_attributes_from_nodes(tree): progress = ProgressBar(widgets=['Parsing Commit Information', Bar()], maxval=len(tree) + len(tree.edges())).start() new_graph = nx.DiGraph() for node in tree.nodes(): diff = '' if 'diff' in tree.node[node]: diff = tree.node[node]['diff'] committed_date = node.committed_date time.timezone = int(node.committer_tz_offset) committed_date = time.gmtime(committed_date) authored_date = node.authored_date time.timezone = int(node.author_tz_offset) authored_date = time.gmtime(authored_date) attributes = dict(authored_date=parse_time(committed_date), author=node.author.email, committer=node.committer.email, committed_date=parse_time(authored_date), message=node.message, diff=diff, hexsha=node.hexsha) new_graph.add_node(node.hexsha, **attributes) progress.update(progress.currval + 1) for edge in tree.edges(): new_graph.add_edge(edge[0].hexsha, edge[1].hexsha) progress.update(progress.currval + 1) progress.finish() return new_graph
def backup(options): def traverse_directory(directory, archive=None, progress=None): num_items = 0 for item in listdir(directory): if isdir(join(directory, item)): num_items += traverse_directory(join(directory, item), archive, progress) else: num_items += 1 if archive is not None: try: archive.add(join(directory, item)) except FileNotFoundError: pass if progress is not None: progress[0] += 1 progress[1].update(progress[0]) return num_items # def backup(options): if not exists('backups'): mkdir('backups') if not exists('campaign-data'): mkdir('campaign-data') sql_backup = 'campaign-data/{}.sql'.format(options.db_name) print('dumping database...') backup_database(options, sql_backup) print('database dumped') if options.files: backup_name = 'backups/{}_{}'.format( '-'.join([ '{:02}'.format(unit) for unit in datetime.now().timetuple()[:3] ]), '-'.join([ '{:02}'.format(unit) for unit in datetime.now().timetuple()[3:6] ])) num_items = 0 directories = ['campaign-data'] if exists('simics-workspace/gold-checkpoints'): directories.append('simics-workspace/gold-checkpoints') print('discovering files to archive') for directory in directories: num_items += traverse_directory(directory) print('archiving files...') with open_tar('{}.tar.gz'.format(backup_name), 'w:gz') \ as backup, ProgressBar(max_value=num_items, widgets=[ Percentage(), ' (', SimpleProgress(format='%(value)d/%(max_value)d'), ') ', Bar(), ' ', Timer()]) as progress_bar: progress = [0, progress_bar] for directory in directories: traverse_directory(directory, backup, progress) remove(sql_backup) print('backup complete')
def get_cloned_repositories(directory): directories = os.listdir(directory) repositories = [] progress = ProgressBar( widgets=['Getting Cloned Repository File Paths', Bar()]) for directory in progress(directories): curr_directory = os.getcwd() + '/' + directory repo_name = os.listdir(curr_directory)[0] repositories.append(git.Repo(curr_directory + '/' + repo_name)) return repositories
def clone_all_repositories(urls): clone_urls = [] p = re.compile('(.*/{2})(.*\.com/[^/]+)(/[^/]+)(/[^/]+)(/[^/]+$)') for url in urls: clone_urls.append(p.sub('\g<1>github.com\g<3>\g<4>.git', url)) temp_path = tempfile.mkdtemp(dir=os.getcwd()) os.chdir(temp_path) progress = ProgressBar(widgets=['Cloning repositories', Bar()]) for url in progress(clone_urls): os.chdir(tempfile.mkdtemp(dir=temp_path)) git.Git().clone(url) os.chdir(temp_path) return temp_path
def get_commit_urls(forks, user, repository_name): repos = [ u'https://api.github.com/repos/' + user + u'/' + repository_name + u'/commits' ] p = re.compile('(\S+/{2})(\S+/)') progress = ProgressBar(widgets=['Creating Clone Urls', Bar()]) if len(forks) > 0: for fork in progress(forks): matches = p.findall(fork)[0] repos.append(matches[0] + matches[1] + 'commits') else: progress.start() progress.finish() return repos
def __init__(self, content_length): prefixes = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi') scaled, power = scale_1024(content_length, len(prefixes)) size_txt = '{:.1f} {:s}B'.format(scaled, prefixes[power]) widgets = [ DataSize(), FormatCustomText(' of {:s} | '.format(size_txt)), AdaptiveTransferSpeed(samples=100), FormatCustomText(' '), Bar(), FormatCustomText(' '), Percentage(), FormatCustomText(' | '), AdaptiveETA(samples=100) ] super(FileTransferProgressBar, self).__init__(max_value=content_length, widgets=widgets)
def update_with_diffs_from_clones(tree): progress = ProgressBar(widgets=['Processing Diff Information', Bar()], maxval=len(tree)).start() root_index = tree.graph['root_index'] queue = deque() parent = tree.nodes()[root_index] tree.node[parent]['diff'] = '' queue.append(parent) visited = set() while len(queue) > 0: parent = queue.popleft() if parent not in visited: visited.add(parent) if progress.currval < len(tree): progress.update(progress.currval + 1) for commit in tree[parent]: if commit not in visited: queue.append(commit) diff_result = '' try: if os.getcwd() != tree.node[commit]['repo_path']: os.chdir(tree.node[commit]['repo_path']) p = subprocess.Popen( ["git", "diff", parent.hexsha, commit.hexsha], stdout=subprocess.PIPE) diff_result = p.communicate()[0] diff_result = diff_result.decode('utf8', 'ignore') diff_result = process_diff(diff_result) except UnicodeDecodeError as e: print e exit(1) tree.node[commit]['diff'] = diff_result progress.finish()
def create_commit_tree_structures_for_forks(repositories): progress = ProgressBar( widgets=['Creating commit history for forks', Bar()]) tree = nx.DiGraph() root_index = -1 for repository in progress(repositories): commits = [] origin = repository.remote('origin') for branch in origin.refs: commits.append(branch.commit) for commit in commits: temp_root_index = update_tree(tree, commit, repository.git_dir) if root_index == -1 and temp_root_index != -1: root_index = temp_root_index tree.graph['root_index'] = root_index return tree
def injections_page(request, campaign_id=None): if campaign_id is not None: campaign = models.campaign.objects.get(id=campaign_id) campaign_items_ = campaign_items injections = models.injection.objects.filter( result__campaign_id=campaign_id) else: campaign = None campaign_items_ = None injections = models.injection.objects.all() injection_filter = filters.injection(request.GET, queryset=injections) error_title = None error_message = None if not injection_filter.qs.count() and injections.count(): error_title = 'Filter Error' error_message = 'Filter did not return any injections and was ignored.' injection_filter = filters.injection(None, queryset=injections) injections = injection_filter.qs print('filtering for failed registers...') failed_registers = [] all_regs = injections.values_list('register', flat=True).distinct() progress_bar = ProgressBar( max_value=all_regs.count(), widgets=[ Percentage(), ' (', SimpleProgress(format='%(value)d/%(max_value)d'), ') ', Bar(), ' ', Timer() ]) for count, register in enumerate(all_regs, start=1): progress_bar.update(count) reg_injections = injections.filter(register=register) if reg_injections.filter(success=True).count(): continue failed_bits = reg_injections.values_list('bit', flat=True).distinct() if len(failed_bits) != max(failed_bits) - 1: continue failed_registers.append(register) print() injections = injections.filter(register__in=failed_registers) if injections.count() > 0: chart_data, chart_list = injections_charts(injections) chart_list = sorted(chart_list, key=lambda x: x['order']) else: chart_data = None chart_list = None injection_table = tables.injections(injections) RequestConfig(request, paginate={ 'per_page': table_length }).configure(injection_table) return render( request, 'injections.html', { 'campaign': campaign, 'campaign_items': campaign_items_, 'chart_data': chart_data, 'chart_list': chart_list, 'error_message': error_message, 'error_title': error_title, 'filter': injection_filter, 'injection_count': '{:,}'.format(injections.count()), 'injection_table': injection_table, 'navigation_items': navigation_items })
def migrate(srchost, srchostauth, srchostport, dsthost, dsthostauth, dsthostport, srchostcacert, dsthostcacert, sslsrc, ssldst, db, flush, dryrun): if srchost == dsthost: print('Source and destination must be different.') return source = redis.StrictRedis(host=srchost, port=int(srchostport), db=db, password=srchostauth, ssl=sslsrc, ssl_ca_certs=srchostcacert) dest = redis.StrictRedis(host=dsthost, port=int(dsthostport), db=db, password=dsthostauth, ssl=ssldst, ssl_ca_certs=dsthostcacert) if flush and not dryrun: dest.flushdb() size = source.dbsize() if size == 0: print('No keys found.') return progress_widgets = [ '%d keys: ' % size, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 while True: cursor, keys = source.scan(cursor, count=COUNT) pipeline = source.pipeline() for key in keys: pipeline.pttl(key) pipeline.dump(key) result = pipeline.execute() pipeline = dest.pipeline() for key, ttl, data in zip(keys, result[::2], result[1::2]): # Sets TTL to 0 according to the library requirements. Since TTL in Redis will give -1 if key exists but no TTL is assigned if ttl == -1: ttl = 0 if data != None: if dryrun: # Execute generic operation to determine whether key exists. This is # used in place of 'restore' to help generate the 'already_existing' # summary statistic. pipeline.type(key) else: pipeline.restore(key, ttl, data) else: non_existing += 1 results = pipeline.execute(False) for key, result in zip(keys, results): if dryrun: # If 'type' returned something other than 'none', we know the key exists. # If 'flush' is enabled, pretend the key doesn't exist to avoid a non-zero # 'already_existing' count on dryrun flush. if not flush and result != b'none': already_existing += 1 elif result != b'OK': e = result if hasattr(e, 'args') and ( e.args[0] == 'BUSYKEY Target key name already exists.' or e.args[0] == 'Target key name is busy.'): already_existing += 1 else: print('Key failed:', key, 'data', result) raise e if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print('Keys disappeared on source during scan:', non_existing) print('Keys already existing on destination:', already_existing)
def compute_availabilitymatrix(cutout, shapes, excluder, nprocesses=None, disable_progressbar=False): """ Compute the eligible share within cutout cells in the overlap with shapes. For parallel calculation (nprocesses not None) the excluder must not be initialized and all raster references must be strings. Otherwise processes are colliding when reading from one common rasterio.DatasetReader. Parameters ---------- cutout : atlite.Cutout Cutout which the availability matrix is aligned to. shapes : geopandas.Series/geopandas.DataFrame Geometries for which the availabilities are calculated. excluder : atlite.gis.ExclusionContainer Container of all meta data or objects which to exclude, i.e. rasters and geometries. nprocesses : int, optional Number of processes to use for calculating the matrix. The paralle- lization can heavily boost the calculation speed. The default is None. disable_progressbar: bool, optional Disable the progressbar if nprocesses is not None. Then the `map` function instead of the `imap` function is used for the multiprocessing pool. This speeds up the calculation. Returns ------- availabilities : xr.DataArray DataArray of shape (|shapes|, |y|, |x|) containing all the eligible share of cutout cell (x,y) in the overlap with shape i. Notes ----- The rasterio (or GDAL) average downsampling returns different results dependent on how the target raster (the cutout raster) is spanned. Either it is spanned from the top left going downwards, e.g. Affine(0.25, 0, 0, 0, -0.25, 50), or starting in the lower left corner and going up, e.g. Affine(0.25, 0, 0, 0, 0.25, 50). Here we stick to the top down version which is why we use `cutout.transform_r` and flipping the y-axis in the end. """ availability = [] shapes = shapes.geometry if isinstance(shapes, gpd.GeoDataFrame) else shapes shapes = shapes.to_crs(excluder.crs) progress = SimpleProgress(format='(%s)' % SimpleProgress.DEFAULT_FORMAT) widgets = [ Percentage(), ' ', progress, ' ', Bar(), ' ', Timer(), ' ', ETA() ] progressbar = ProgressBar(prefix='Compute availabily matrix: ', widgets=widgets, max_value=len(shapes)) args = (excluder, cutout.transform_r, cutout.crs, cutout.shape) if nprocesses is None: for i in progressbar(shapes.index): _ = shape_availability_reprojected(shapes.loc[[i]], *args)[0] availability.append(_) else: assert excluder.all_closed, ('For parallelization all raster files ' 'in excluder must be closed') kwargs = { 'initializer': _init_process, 'initargs': (shapes, *args), 'maxtasksperchild': 20, 'processes': nprocesses } with mp.Pool(**kwargs) as pool: if disable_progressbar: availability = list(pool.map(_process_func, shapes.index)) else: imap = pool.imap(_process_func, shapes.index) availability = list(progressbar(imap)) availability = np.stack(availability)[:, ::-1] # flip axis, see Notes coords = [(shapes.index), ('y', cutout.data.y), ('x', cutout.data.x)] return xr.DataArray(availability, coords=coords)
print('\nHere is a sample lead (last row):') print(json.dumps(unique_leads[grouper], indent=4)) print('\nAre you sure you want to continue? (y/n) ') if input('') != 'y': sys.exit() ############################################################################## api = CloseIO_API(args.api_key) progress_widgets = [ 'Importing %d rows: ' % import_count, Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed(), ] pbar = ProgressBar(widgets=progress_widgets, maxval=import_count).start() dupes_cnt = 0 for key, val in unique_leads.items(): retries = 5 # check if it's a duplicate dupe = False if args.skip_duplicates and val.get('name'):
'sort:date_created', # sort by date_created so that the oldest lead is always merged into '_skip': offset, '_fields': 'id,display_name,contacts,status_label,opportunities' }) leads = resp['data'] leads_merged_this_page = 0 duplicates_this_page = set() if first_iteration: total_leads = resp['total_results'] progress_widgets = [ 'Analyzing %d Leads: ' % total_leads, Counter(), ' ', Percentage(), ' ', Bar(), ' ', AdaptiveETA(), ' ', FileTransferSpeed() ] pbar = ProgressBar(widgets=progress_widgets, maxval=total_leads).start() pbar.update(offset) first_iteration = False for idx, lead in enumerate(leads): logger.debug("-------------------------------------------------") logger.debug("idx: %d, lead: %s (%s)", idx, lead['id'], lead['display_name']) logger.debug("duplicates_this_page: %s", duplicates_this_page) # To avoid race conditions we skip over leads we've already seen
def migrate(srchost, srchostauth, srchostport, dsthost, dsthostauth, dsthostport, dsthostcacert, sslsrc, ssldst, db, flush): if srchost == dsthost: print('Source and destination must be different.') return source = redis.StrictRedis(host=srchost, port=int(srchostport), db=db, password=srchostauth, ssl=sslsrc, ssl_cert_reqs=None) dest = redis.StrictRedis(host=dsthost, port=int(dsthostport), db=db, password=dsthostauth, ssl=ssldst, ssl_ca_certs=dsthostcacert) if flush: dest.flushdb() size = source.dbsize() if size == 0: print('No keys found.') return progress_widgets = [ '%d keys: ' % size, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 while True: cursor, keys = source.scan(cursor, count=COUNT) pipeline = source.pipeline() for key in keys: pipeline.pttl(key) pipeline.dump(key) result = pipeline.execute() pipeline = dest.pipeline() for key, ttl, data in zip(keys, result[::2], result[1::2]): # Sets TTL to 0 according to the library requirements. Since TTL in Redis will give -1 if key exists but no TTL is assigned if ttl == -1: ttl = 0 if data != None: pipeline.restore(key, ttl, data) else: non_existing += 1 results = pipeline.execute(False) for key, result in zip(keys, results): if result != b'OK': e = result if hasattr(e, 'args') and ( e.args[0] == 'BUSYKEY Target key name already exists.' or e.args[0] == 'Target key name is busy.'): already_existing += 1 else: print('Key failed:', key, 'data', result) raise e if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print('Keys disappeared on source during scan:', non_existing) print('Keys already existing on destination:', already_existing)
elif lead['contacts'] not in unique_leads[grouper]['contacts']: unique_leads[grouper]['contacts'].extend(lead['contacts']) print 'Found %d leads (grouped by company) from %d contacts.' % (len(unique_leads), import_count) print '\nHere is a sample lead (last row):' print json.dumps(unique_leads[grouper], indent=4) if raw_input('\nAre you sure you want to continue? (y/n) ') != 'y': sys.exit() ############################################################################## api = CloseIO_API(args.api_key, development=args.development) progress_widgets = ['Importing %d rows: ' % import_count, Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()] pbar = ProgressBar(widgets=progress_widgets, maxval=import_count).start() dupes_cnt = 0 for key, val in unique_leads.items(): retries = 5 # check if it's a duplicate dupe = False if args.skip_duplicates and val.get('name'): # get the org id necessary for search org_id = api.get('api_key')['data'][0]['organization_id'] # get all the search results for given lead name
def migrate(srchost, dsthost, srccluster, dstcluster, db, flush): if srchost == dsthost: print 'Source and destination must be different.' return if srccluster: source_nodes = literal_eval(srchost) source = StrictRedisCluster(startup_nodes=source_nodes, decode_responses=True) logging.debug('source cluster info: %s', source.cluster_info()) else: source = redis.Redis(srchost, db=db) if dstcluster: dest_nodes = literal_eval(dsthost) dest = StrictRedisCluster(startup_nodes=dest_nodes, decode_responses=True) logging.debug('dest cluster info: %s', dest.cluster_info()) else: dest = redis.Redis(dsthost, db=db) if flush: dest.flushdb() if srccluster: representatives = { v['cluster_my_epoch']: k for k, v in source.cluster_info().items() } size = source.dbsize() size = sum(size[reprensentative] for reprensentative in representatives.values()) else: size = source.dbsize() if size == 0: print 'No keys found.' return progress_widgets = [ '%d keys: ' % size, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 if srccluster: counter = 0 keys = [] # iterate all the keys for key in source.scan_iter(count=COUNT): counter += 1 keys.append(key) if counter % COUNT == 0: already_existing, non_existing = handle_keys( source, dest, keys, already_existing, non_existing) cnt += len(keys) pbar.update(min(size, cnt)) keys = [] # handle the remaining if len(keys) > 0: already_existing, non_existing = handle_keys( source, dest, keys, already_existing, non_existing) cnt += len(keys) pbar.update(min(size, cnt)) else: while True: cursor, keys = source.scan(cursor, count=COUNT) already_existing, non_existing = handle_keys( source, dest, keys, already_existing, non_existing) if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print 'Keys disappeared on source during scan:', non_existing print 'Keys already existing on destination:', already_existing
def migrate(srchost, dsthost, srcport, dstport, db, flush_flag): if (srchost == dsthost and srcport == dstport): print('Source and destination must be different.') return source = redis.Redis(srchost, port=srcport, db=db) dest = redis.Redis(dsthost, port=dstport, db=db) if (flush_flag == 'Y'): dest.flushdb() size = source.dbsize() if (size == 0): print('No keys found.') return progress_widgets = [ '%d keys: ' % size, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 while True: cursor, keys = source.scan(cursor, count=COUNT) pipeline = source.pipeline() for key in keys: pipeline.pttl(key) pipeline.dump(key) result = pipeline.execute() print('#######################################') #print(result) print('#######################################') pipeline = dest.pipeline() for key, ttl, data in zip(keys, result[::2], result[1::2]): if ttl is None: ttl = 0 if data != None: pipeline.restore(key, ttl, data) else: non_existing += 1 results = pipeline.execute(False) print('#######################################') #pprint(result) print('#######################################') for key, result in zip(keys, results): if result != 'OK': e = result if hasattr(e, 'message') and ( e.message == 'BUSYKEY Target key name already exists.' or e.message == 'Target key name is busy.'): already_existing += 1 else: print('Key failed:', key, repr(data), repr(result)) raise e if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print('Keys disappeared on source during scan:', non_existing) print('Keys already existing on destination:', already_existing)
def migrate(srchost, dsthost, flush, monitor): if srchost == dsthost: print 'Source and destination must be different.' return print "Start monitor Process" q = Queue() monit_host = monitor.split(":") shost = srchost.split(":") dhost = dsthost.split(":") d = multiprocessing.Process(name='monitor', target=monit_playback, args=(q, shost[0], int(shost[1]), dhost[0], int(dhost[1]))) d.daemon = True d.start() source = redis.Redis(host=shost[0], port=int(shost[1])) sinfo = source.info() for i in sinfo.keys(): if re.match("^db(\d)+", i): dbnum = int(re.compile(r'(\d+)').search(i).group(0)) db = i print 'start scan db:', db source = redis.Redis(host=shost[0], port=int(shost[1]), db=dbnum) dest = redis.Redis(host=dhost[0], port=int(dhost[1]), db=dbnum) if flush: dest.flushdb() size = source.dbsize() if size == 0: print 'No keys found.' return progress_widgets = [ '%s, %d keys: ' % (db, size), Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 while True: cursor, keys = source.scan(cursor, count=COUNT) pipeline = source.pipeline() for key in keys: pipeline.pttl(key) pipeline.dump(key) result = pipeline.execute() pipeline = dest.pipeline() for key, ttl, data in zip(keys, result[::2], result[1::2]): if ttl is None: ttl = 0 if data != None: pipeline.restore(key, ttl, data) else: non_existing += 1 results = pipeline.execute(False) for key, result in zip(keys, results): if result != 'OK': e = result if hasattr(e, 'message') and ( e.message == 'BUSYKEY Target key name already exists.' or e.message == 'Target key name is busy.'): already_existing += 1 else: print 'Key failed:', key, ` data `, ` result ` raise e if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print 'Keys disappeared on source during scan:', non_existing print 'Keys already existing on destination:', already_existing while True: if not q.empty(): print q.get() else: break d.join()
total_leads_merged = 0 first_iteration = True while has_more: resp = api.get('lead', data={ 'query': 'sort:date_created', # sort by date_created so that the oldest lead is always merged into '_skip': offset, '_fields': 'id,display_name,name,contacts,status_label,opportunities' }) leads = resp['data'] leads_merged_this_page = 0 duplicates_this_page = set() if first_iteration: total_leads = resp['total_results'] progress_widgets = ['Analyzing %d Leads: ' % total_leads, Counter(), Percentage(), ' ', Bar(), ' ', AdaptiveETA(), ' ', FileTransferSpeed()] pbar = ProgressBar(widgets=progress_widgets, maxval=total_leads).start() pbar.update(offset) first_iteration = False for idx, lead in enumerate(leads): logger.debug("-------------------------------------------------") logger.debug("idx: %d, lead: %s (%s)", idx, lead['id'], lead['display_name']) logger.debug("duplicates_this_page: %s", duplicates_this_page) # To avoid race conditions we skip over leads we've already seen # in our duplicates lists (see README at top of file) if lead['id'] in duplicates_this_page: logger.debug("skipping lead %s", lead['id']) continue