elif lead['contacts'] not in unique_leads[grouper]['contacts']: unique_leads[grouper]['contacts'].extend(lead['contacts']) print 'Found %d leads (grouped by company) from %d contacts.' % (len(unique_leads), import_count) print '\nHere is a sample lead (last row):' print json.dumps(unique_leads[grouper], indent=4) if raw_input('\nAre you sure you want to continue? (y/n) ') != 'y': sys.exit() ############################################################################## api = CloseIO_API(args.api_key, development=args.development) progress_widgets = ['Importing %d rows: ' % import_count, Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()] pbar = ProgressBar(widgets=progress_widgets, maxval=import_count).start() dupes_cnt = 0 for key, val in unique_leads.items(): retries = 5 # check if it's a duplicate dupe = False if args.skip_duplicates and val.get('name'): # get the org id necessary for search org_id = api.get('api_key')['data'][0]['organization_id'] # get all the search results for given lead name
def migrate(srchost, dsthost, flush, monitor): if srchost == dsthost: print 'Source and destination must be different.' return print "Start monitor Process" q = Queue() monit_host = monitor.split(":") shost = srchost.split(":") dhost = dsthost.split(":") d = multiprocessing.Process(name='monitor', target=monit_playback, args=(q, shost[0], int(shost[1]), dhost[0], int(dhost[1]))) d.daemon = True d.start() source = redis.Redis(host=shost[0], port=int(shost[1])) sinfo = source.info() for i in sinfo.keys(): if re.match("^db(\d)+", i): dbnum = int(re.compile(r'(\d+)').search(i).group(0)) db = i print 'start scan db:', db source = redis.Redis(host=shost[0], port=int(shost[1]), db=dbnum) dest = redis.Redis(host=dhost[0], port=int(dhost[1]), db=dbnum) if flush: dest.flushdb() size = source.dbsize() if size == 0: print 'No keys found.' return progress_widgets = [ '%s, %d keys: ' % (db, size), Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 while True: cursor, keys = source.scan(cursor, count=COUNT) pipeline = source.pipeline() for key in keys: pipeline.pttl(key) pipeline.dump(key) result = pipeline.execute() pipeline = dest.pipeline() for key, ttl, data in zip(keys, result[::2], result[1::2]): if ttl is None: ttl = 0 if data != None: pipeline.restore(key, ttl, data) else: non_existing += 1 results = pipeline.execute(False) for key, result in zip(keys, results): if result != 'OK': e = result if hasattr(e, 'message') and ( e.message == 'BUSYKEY Target key name already exists.' or e.message == 'Target key name is busy.'): already_existing += 1 else: print 'Key failed:', key, ` data `, ` result ` raise e if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print 'Keys disappeared on source during scan:', non_existing print 'Keys already existing on destination:', already_existing while True: if not q.empty(): print q.get() else: break d.join()
print('\nHere is a sample lead (last row):') print(json.dumps(unique_leads[grouper], indent=4)) print('\nAre you sure you want to continue? (y/n) ') if raw_input('') != 'y': sys.exit() ############################################################################## api = CloseIO_API(args.api_key, development=args.development) progress_widgets = [ 'Importing %d rows: ' % import_count, Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed() ] pbar = ProgressBar(widgets=progress_widgets, maxval=import_count).start() dupes_cnt = 0 for key, val in unique_leads.items(): retries = 5 # check if it's a duplicate dupe = False if args.skip_duplicates and val.get('name'): # get the org id necessary for search org_id = api.get('api_key')['data'][0]['organization_id']
def migrate(srchost, srchostauth, srchostport, dsthost, dsthostauth, dsthostport, dsthostcacert, sslsrc, ssldst, db, flush): if srchost == dsthost: print('Source and destination must be different.') return source = redis.StrictRedis(host=srchost, port=int(srchostport), db=db, password=srchostauth, ssl=sslsrc, ssl_cert_reqs=None) dest = redis.StrictRedis(host=dsthost, port=int(dsthostport), db=db, password=dsthostauth, ssl=ssldst, ssl_ca_certs=dsthostcacert) if flush: dest.flushdb() size = source.dbsize() if size == 0: print('No keys found.') return progress_widgets = [ '%d keys: ' % size, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 while True: cursor, keys = source.scan(cursor, count=COUNT) pipeline = source.pipeline() for key in keys: pipeline.pttl(key) pipeline.dump(key) result = pipeline.execute() pipeline = dest.pipeline() for key, ttl, data in zip(keys, result[::2], result[1::2]): # Sets TTL to 0 according to the library requirements. Since TTL in Redis will give -1 if key exists but no TTL is assigned if ttl == -1: ttl = 0 if data != None: pipeline.restore(key, ttl, data) else: non_existing += 1 results = pipeline.execute(False) for key, result in zip(keys, results): if result != b'OK': e = result if hasattr(e, 'args') and ( e.args[0] == 'BUSYKEY Target key name already exists.' or e.args[0] == 'Target key name is busy.'): already_existing += 1 else: print('Key failed:', key, 'data', result) raise e if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print('Keys disappeared on source during scan:', non_existing) print('Keys already existing on destination:', already_existing)
def migrate(srchost, dsthost, srcport, dstport, db, flush_flag): if (srchost == dsthost and srcport == dstport): print('Source and destination must be different.') return source = redis.Redis(srchost, port=srcport, db=db) dest = redis.Redis(dsthost, port=dstport, db=db) if (flush_flag == 'Y'): dest.flushdb() size = source.dbsize() if (size == 0): print('No keys found.') return progress_widgets = [ '%d keys: ' % size, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 while True: cursor, keys = source.scan(cursor, count=COUNT) pipeline = source.pipeline() for key in keys: pipeline.pttl(key) pipeline.dump(key) result = pipeline.execute() print('#######################################') #print(result) print('#######################################') pipeline = dest.pipeline() for key, ttl, data in zip(keys, result[::2], result[1::2]): if ttl is None: ttl = 0 if data != None: pipeline.restore(key, ttl, data) else: non_existing += 1 results = pipeline.execute(False) print('#######################################') #pprint(result) print('#######################################') for key, result in zip(keys, results): if result != 'OK': e = result if hasattr(e, 'message') and ( e.message == 'BUSYKEY Target key name already exists.' or e.message == 'Target key name is busy.'): already_existing += 1 else: print('Key failed:', key, repr(data), repr(result)) raise e if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print('Keys disappeared on source during scan:', non_existing) print('Keys already existing on destination:', already_existing)
def migrate(srchost, dsthost, srccluster, dstcluster, db, flush): if srchost == dsthost: print 'Source and destination must be different.' return if srccluster: source_nodes = literal_eval(srchost) source = StrictRedisCluster(startup_nodes=source_nodes, decode_responses=True) logging.debug('source cluster info: %s', source.cluster_info()) else: source = redis.Redis(srchost, db=db) if dstcluster: dest_nodes = literal_eval(dsthost) dest = StrictRedisCluster(startup_nodes=dest_nodes, decode_responses=True) logging.debug('dest cluster info: %s', dest.cluster_info()) else: dest = redis.Redis(dsthost, db=db) if flush: dest.flushdb() if srccluster: representatives = { v['cluster_my_epoch']: k for k, v in source.cluster_info().items() } size = source.dbsize() size = sum(size[reprensentative] for reprensentative in representatives.values()) else: size = source.dbsize() if size == 0: print 'No keys found.' return progress_widgets = [ '%d keys: ' % size, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 if srccluster: counter = 0 keys = [] # iterate all the keys for key in source.scan_iter(count=COUNT): counter += 1 keys.append(key) if counter % COUNT == 0: already_existing, non_existing = handle_keys( source, dest, keys, already_existing, non_existing) cnt += len(keys) pbar.update(min(size, cnt)) keys = [] # handle the remaining if len(keys) > 0: already_existing, non_existing = handle_keys( source, dest, keys, already_existing, non_existing) cnt += len(keys) pbar.update(min(size, cnt)) else: while True: cursor, keys = source.scan(cursor, count=COUNT) already_existing, non_existing = handle_keys( source, dest, keys, already_existing, non_existing) if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print 'Keys disappeared on source during scan:', non_existing print 'Keys already existing on destination:', already_existing
def migrate(srchost, srchostauth, srchostport, dsthost, dsthostauth, dsthostport, srchostcacert, dsthostcacert, sslsrc, ssldst, db, flush, dryrun): if srchost == dsthost: print('Source and destination must be different.') return source = redis.StrictRedis(host=srchost, port=int(srchostport), db=db, password=srchostauth, ssl=sslsrc, ssl_ca_certs=srchostcacert) dest = redis.StrictRedis(host=dsthost, port=int(dsthostport), db=db, password=dsthostauth, ssl=ssldst, ssl_ca_certs=dsthostcacert) if flush and not dryrun: dest.flushdb() size = source.dbsize() if size == 0: print('No keys found.') return progress_widgets = [ '%d keys: ' % size, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=progress_widgets, maxval=size).start() COUNT = 2000 # scan size cnt = 0 non_existing = 0 already_existing = 0 cursor = 0 while True: cursor, keys = source.scan(cursor, count=COUNT) pipeline = source.pipeline() for key in keys: pipeline.pttl(key) pipeline.dump(key) result = pipeline.execute() pipeline = dest.pipeline() for key, ttl, data in zip(keys, result[::2], result[1::2]): # Sets TTL to 0 according to the library requirements. Since TTL in Redis will give -1 if key exists but no TTL is assigned if ttl == -1: ttl = 0 if data != None: if dryrun: # Execute generic operation to determine whether key exists. This is # used in place of 'restore' to help generate the 'already_existing' # summary statistic. pipeline.type(key) else: pipeline.restore(key, ttl, data) else: non_existing += 1 results = pipeline.execute(False) for key, result in zip(keys, results): if dryrun: # If 'type' returned something other than 'none', we know the key exists. # If 'flush' is enabled, pretend the key doesn't exist to avoid a non-zero # 'already_existing' count on dryrun flush. if not flush and result != b'none': already_existing += 1 elif result != b'OK': e = result if hasattr(e, 'args') and ( e.args[0] == 'BUSYKEY Target key name already exists.' or e.args[0] == 'Target key name is busy.'): already_existing += 1 else: print('Key failed:', key, 'data', result) raise e if cursor == 0: break cnt += len(keys) pbar.update(min(size, cnt)) pbar.finish() print('Keys disappeared on source during scan:', non_existing) print('Keys already existing on destination:', already_existing)
print('\nAre you sure you want to continue? (y/n) ') if input('') != 'y': sys.exit() ############################################################################## api = CloseIO_API(args.api_key) progress_widgets = [ 'Importing %d rows: ' % import_count, Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed(), ] pbar = ProgressBar(widgets=progress_widgets, maxval=import_count).start() dupes_cnt = 0 for key, val in unique_leads.items(): retries = 5 # check if it's a duplicate dupe = False if args.skip_duplicates and val.get('name'): # get the org id necessary for search
def compute_availabilitymatrix(cutout, shapes, excluder, nprocesses=None, disable_progressbar=False): """ Compute the eligible share within cutout cells in the overlap with shapes. For parallel calculation (nprocesses not None) the excluder must not be initialized and all raster references must be strings. Otherwise processes are colliding when reading from one common rasterio.DatasetReader. Parameters ---------- cutout : atlite.Cutout Cutout which the availability matrix is aligned to. shapes : geopandas.Series/geopandas.DataFrame Geometries for which the availabilities are calculated. excluder : atlite.gis.ExclusionContainer Container of all meta data or objects which to exclude, i.e. rasters and geometries. nprocesses : int, optional Number of processes to use for calculating the matrix. The paralle- lization can heavily boost the calculation speed. The default is None. disable_progressbar: bool, optional Disable the progressbar if nprocesses is not None. Then the `map` function instead of the `imap` function is used for the multiprocessing pool. This speeds up the calculation. Returns ------- availabilities : xr.DataArray DataArray of shape (|shapes|, |y|, |x|) containing all the eligible share of cutout cell (x,y) in the overlap with shape i. Notes ----- The rasterio (or GDAL) average downsampling returns different results dependent on how the target raster (the cutout raster) is spanned. Either it is spanned from the top left going downwards, e.g. Affine(0.25, 0, 0, 0, -0.25, 50), or starting in the lower left corner and going up, e.g. Affine(0.25, 0, 0, 0, 0.25, 50). Here we stick to the top down version which is why we use `cutout.transform_r` and flipping the y-axis in the end. """ availability = [] shapes = shapes.geometry if isinstance(shapes, gpd.GeoDataFrame) else shapes shapes = shapes.to_crs(excluder.crs) progress = SimpleProgress(format='(%s)' % SimpleProgress.DEFAULT_FORMAT) widgets = [ Percentage(), ' ', progress, ' ', Bar(), ' ', Timer(), ' ', ETA() ] progressbar = ProgressBar(prefix='Compute availabily matrix: ', widgets=widgets, max_value=len(shapes)) args = (excluder, cutout.transform_r, cutout.crs, cutout.shape) if nprocesses is None: for i in progressbar(shapes.index): _ = shape_availability_reprojected(shapes.loc[[i]], *args)[0] availability.append(_) else: assert excluder.all_closed, ('For parallelization all raster files ' 'in excluder must be closed') kwargs = { 'initializer': _init_process, 'initargs': (shapes, *args), 'maxtasksperchild': 20, 'processes': nprocesses } with mp.Pool(**kwargs) as pool: if disable_progressbar: availability = list(pool.map(_process_func, shapes.index)) else: imap = pool.imap(_process_func, shapes.index) availability = list(progressbar(imap)) availability = np.stack(availability)[:, ::-1] # flip axis, see Notes coords = [(shapes.index), ('y', cutout.data.y), ('x', cutout.data.x)] return xr.DataArray(availability, coords=coords)