# Connect to MongoDB connection = MongoClient("mongodb://localhost") db = connection.bundestagswahl.tweets # Set maximum tweets maxtweets = 5000 # Temporary dictionaries original_tweets = {'documents': []} tidied_tweets = {'documents': []} # Use a progress bar in your shell (for larger data sets.) pbar1 = ProgressBar(widgets=['Analyze Tweets: ', Percentage(), Bar()], maxval=maxtweets).start() pbar2 = ProgressBar(widgets=['Write Excel File: ', Percentage(), Bar()], maxval=maxtweets).start() def tidy_tweet(tweet): ''' Helper function to tidy the tweet text. The regex removes special characters and links, etc. ''' return ' '.join( re.sub( "(@[A-Za-z0-9äöüÄÖÜß]+)|([^0-9A-Za-z9äöüÄÖÜß \t])|(\w+:\/\/\S+)",
def NodeDic(results, edge_info, node_info): ''' Function takes the results of running a query, NETS edge label information, and a list of node information (list[0] contains the NETS nodes label triples, list[1] contains the contains the NETS nodes identifier triples). The function returns a list of dictionaries where list[0] contains a nested dictionary where keys are bio entity identifiers and the values are the the human readable labels and database identifiers; list[1] contains a dictionary where the bio node is the key and the value is a set of possible NETS node types for that node. :param results: json file containing the query results from endpoint :param edge_info: dictionary where the keys are the NETS edges and the values are the edge labels :param node_info: a list of node information (list[0] contains the NETS nodes label triples, list[1] contains the contains the NETS nodes identifier triples) :return: a list of dictionaries: list[0] contains a nested dictionary where keys are bio entity identifiers and the values are the the human readable labels and database identifiers; list[1] contains a dictionary where the bio node is the key and the value is a set of possible NETS node types for that node ''' print 'Start building OWL-NETs metadata dictionary' # creates a map to store NETS node type information node_type = {} # creates a map to identify which query variables represent the BIO world ID, label, and ICE ID node_labeler = {} # assign variables needed for node dictionary NETS = set([x.strip('?') for y in edge_info[0].keys() for x in y]) labels = [[ re.sub('[?|"\n"]', '', x.split(' ')[0]), re.sub('[?|"\n"]', '', x.split(' ')[2]) ] for x in node_info[0]] ids = [[x.split(' ')[0].strip('?'), x.split(' ')[2].strip('?')] for x in node_info[1]] # initialize progress bar progress bar widgets = [Percentage(), Bar(), FormatLabel('(elapsed: %(elapsed)s)')] pbar = ProgressBar(widgets=widgets, maxval=len(NETS)) for node in pbar(NETS): node_labeler[node] = {} for res in results['results']['bindings']: node_key = str(res[node]['value']) label_value = str([x[1] for x in labels if x[0] == node][0].encode('utf8')) id_value = str([x[0] for x in ids if x[1] == node][0].encode('utf8')) # NODE TYPE: setting node type information if node_key in node_type.keys(): node_type[node_key].add(node) else: node_type[node_key] = set() node_type[node_key].add(node) # NODE METADATA: setting node attributes by NETS node type if node_key in node_labeler[node].keys(): # order matters - not using a set so that each ICE can be mapped to the label with the same index node_labeler[node][node_key]['label'].append( res[label_value]['value'].encode('utf8')) node_labeler[node][node_key]['id'].append( res[id_value]['value'].encode('utf8')) else: node_labeler[node][node_key] = {} node_labeler[node][node_key]['label'] = [ res[label_value]['value'].encode('utf8') ] node_labeler[node][node_key]['id'] = [ res[id_value]['value'].encode('utf8') ] # close progress bar pbar.finish() print 'Finished building OWL-NETs metadata dictionary' print '\n' # CHECK: verify that the counts are correct for node in NETS: res_count = set() for res in results['results']['bindings']: res_count.add(res[node]['value']) if len(node_labeler[node].keys()) != len( res_count): # verify the number of nodes in graph is correct raise ValueError('The count of results for the ' + str(node) + ' NETS node in the node dictionary differ ' 'from the query output') return node_labeler, node_type
deadr_state = numpy.zeros((3, 1)) control = numpy.zeros((1, 2)) observation = numpy.zeros((len(RFID), 1)) time = 0.0 truth_trajectory = [] deadr_trajectory = [] est_trajectory = [] fig = plt.figure(facecolor="w") #ax_arrow = fig.add_subplot(111, aspect='equal') ax_circle = fig.add_subplot(111, aspect='equal') ax_trajectory = fig.add_subplot(111, aspect='equal') num_of_loop = int(ENDTIME / DELTATIME) pbar = ProgressBar(widgets=[Percentage(), Bar()], max_value=num_of_loop) # Main loop for i in range(num_of_loop): # Calculation ground truth time = time + DELTATIME control = control_model(time) truth_state = process_model(x=truth_state, u=control, delta_time=DELTATIME) # Calculation dead reckoning deadr_state = process_model( x=deadr_state, u=(control + simulation_process_cov.dot(numpy.random.randn(2, 1))), delta_time=DELTATIME)
def get_progress_bar(maxval): widgets = [Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()] return ProgressBar(widgets=widgets, maxval=maxval)
im = imread(measurementFiles[0]) #im = wiener(im) dim = shape(im) densityVals = -log(im[20:dim[0]-20:1,20:dim[1]-20:1,:]) dim = shape(densityVals) pixelCalcWidth = 20; T = zeros([dim[0]/pixelCalcWidth,dim[1]/pixelCalcWidth]) D = zeros([dim[0]/pixelCalcWidth,dim[1]/pixelCalcWidth]) w = zeros([pixelCalcWidth,pixelCalcWidth,3]) OD = zeros([pixelCalcWidth,pixelCalcWidth,3]) widgets = ['Dose calc: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' ', FileTransferSpeed()] pbar = ProgressBar(widgets = widgets, maxval=dim[0]/pixelCalcWidth).start() for i in range(dim[0]/pixelCalcWidth): idist = i*pixelCalcWidth for j in range(dim[1]/pixelCalcWidth): jdist = j*pixelCalcWidth OD = densityVals[idist:idist+pixelCalcWidth, jdist:jdist+pixelCalcWidth, :] w[:,:,0] = 1/density2DoseSigma(OD[:,:,0],red,redSig) w[:,:,1] = 1/density2DoseSigma(OD[:,:,1],green,greenSig) w[:,:,2] = 1/density2DoseSigma(OD[:,:,2],blue,blueSig)
def noisify(meta_filename, noise_filename, noise_name, noise_percent): is_noise, data_noise, sr_noise_orig = check_noise(noise_filename) if is_noise: sr_noise = sr_noise_orig source_dir = os.path.dirname(meta_filename) wavs_dir = os.path.join(source_dir, 'wavs') out_dir = os.path.join(source_dir, 'wavs', noise_name) meta_outname = os.path.join(source_dir, 'metadata-{}.csv'.format(noise_name)) meta_data = codecs.open(meta_filename, 'r', 'utf-8').readlines() num_entries = len(meta_data) if not os.path.exists(out_dir): os.makedirs(out_dir) widgets=[FormatLabel('File: %(message)s [Iter: %(value)s/'+str(num_entries)+']'), ' ', Percentage(), ' ', Bar(marker='@', left='[', right=']'), ' ', ETA()] pBar = ProgressBar(widgets=widgets, maxval=num_entries).start() out_meta = [] for i, line in enumerate(meta_data): filename, orig_text, clean_text = line.strip().split('|') pBar.update(i, filename) infile = os.path.join(wavs_dir, filename + '.wav') outfile = os.path.join(out_dir, filename + '.wav') data_audio, sr_audio = sf.read(infile, dtype='int16') if sr_audio != sr_noise: noise_data = resample_noise_file(sr_audio, sr_noise, data_noise) sr_noise = sr_audio create_noise(outfile, infile, data_audio, noise_data, sr_audio, noise_percent) out_meta.append(u'{}/{}|{}|{}'.format(noise_name, filename, orig_text, clean_text)) pBar.finish() print('Saving new metadata... ', end='') sys.stdout.flush() outf = codecs.open(meta_outname, 'w', 'utf-8') for l in out_meta: print(l, file=outf) outf.close() print('done') print('Added meta-file: {}'.format(meta_outname)) else: print('Invalid Noise data. Exiting!')
def main(source=None, num_epochs=None, method=None, batch_size=None, learning_rate=None, beta=None, image_dir=None, binary_dir=None, dim_z=None, prior=None): # DATA X_train, y_train, X_val, y_val, X_test, y_test = load_dataset( source=source) train_samples = X_train.shape[0] # VAR noise_var = T.matrix('noise') input_var = T.tensor4('inputs') log_Z = theano.shared(lasagne.utils.floatX(0.), name='log_Z') # MODEL logger.info('Building model and graph') generator = build_generator(noise_var, dim_z=dim_z) discriminator = build_discriminator(input_var) # RNG trng = RandomStreams(random.randint(1, 1000000)) # GRAPH / LOSS g_output_logit = lasagne.layers.get_output(generator) generator_loss, discriminator_loss, D_r, D_f, log_Z_est, log_w, w_tilde, d = BGAN( discriminator, g_output_logit, n_samples, trng) # OPTIMIZER generator_params = lasagne.layers.get_all_params(generator, trainable=True) discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) eta = theano.shared(lasagne.utils.floatX(learning_rate)) updates = lasagne.updates.adam(generator_loss, generator_params, learning_rate=eta, beta1=beta) updates.update( lasagne.updates.adam(discriminator_loss, discriminator_params, learning_rate=eta, beta1=beta)) updates.update([(log_Z, 0.95 * log_Z + 0.05 * log_Z_est.mean())]) # COMPILE results = { 'p(real)': (T.nnet.sigmoid(real_out) > .5).mean(), 'p(fake': (T.nnet.sigmoid(fake_out) < .5).mean(), 'G loss': generator_loss, 'D loss': discriminator_loss, 'log Z': log_Z, 'log Z est': log_Z_est.mean(), 'log_Z est var': log_Z_est.std()**2, 'log w': log_w.mean(), 'log w var': log_w.std()**2, 'norm w': w_tilde.mean(), 'norm w var': w_tilde.std()**2, 'ESS': (1. / (w_tilde**2).sum(0)).mean() } train_fn = theano.function([noise_var, input_var], results, updates=updates) gen_fn = theano.function([noise_var], lasagne.layers.get_output(generator, deterministic=True)) # TRAIN logger.info('Training...') results = {} for epoch in range(num_epochs): u = 0 prefix = '{}_{}'.format(method, epoch) e_results = {} widgets = ['Epoch {}, '.format(epoch), Timer(), Bar()] pbar = ProgressBar(widgets=widgets, maxval=(train_samples // batch_size)).start() prefix = str(epoch) start_time = time.time() batch0 = None for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True): inputs, targets = batch if batch0 is None: batch0 = inputs if prior == 'uniform': noise = floatX(np.random.rand(len(inputs), dim_z)) elif prior == 'gaussian': noise = floatX(numpy.random.normal(size=(len(inputs), dim_z))) outs = train_fn(noise, inputs) outs = dict((k, np.asarray(v)) for k, v in outs.items()) update_dict_of_lists(e_results, **outs) u += 1 pbar.update(u) update_dict_of_lists(results, **e_results) np.savez(path.join(binary_dir, '{}_results.npz'.format(prefix)), **results) try: if prior == 'uniform': noise = floatX(np.random.rand(100, dim_z)) elif prior == 'gaussian': noise = floatX(numpy.random.normal(size=(64, dim_z))) samples = gen_fn(noise) summarize(results, samples, image_dir=image_dir, prefix=prefix) except Exception as e: print(e) pass logger.info('Epoch {} of {} took {:.3f}s'.format( epoch + 1, num_epochs, time.time() - start_time)) np.savez( path.join(binary_dir, '{}_generator_params.npz'.format(prefix)), *lasagne.layers.get_all_param_values(generator)) np.savez( path.join(binary_dir, '{}_discriminator_params.npz'.format(prefix)), *lasagne.layers.get_all_param_values(discriminator))
def setup_time_indices(fn_pattern, xtimeName): # {{{ """ This function finds a list of NetCDF files containing time-dependent MPAS data and extracts the time indices in each file. The routine insures that each time is unique. """ # Build file list and time indices if ';' in fn_pattern: file_list = [] for pattern in fn_pattern.split(';'): file_list.extend(glob.glob(pattern)) else: file_list = glob.glob(fn_pattern) file_list.sort() local_indices = [] file_names = [] all_times = [] if len(file_list) == 0: print("No files to process.") print("Exiting...") sys.exit(0) if use_progress_bar: widgets = [ 'Build time indices: ', Percentage(), ' ', Bar(), ' ', ETA() ] time_bar = ProgressBar(widgets=widgets, maxval=len(file_list)).start() else: print("Build time indices...") i_file = 0 allTIndex = 0 for file_name in file_list: try: nc_file = open_netcdf(file_name) except IOError: print("Warning: could not open {}".format(file_name)) continue if 'Time' not in nc_file.dimensions or xtimeName is None: local_times = ['0'] else: local_times = [] if xtimeName == 'none': # no xtime variable so just use integers converted to strings for index in range(len(nc_file.dimensions['Time'])): local_times.append(allTIndex) allTIndex += 1 else: if xtimeName not in nc_file.variables: raise ValueError("xtime variable name {} not found in " "{}".format(xtimeName, file_name)) xtime = nc_file.variables[xtimeName] if len(xtime.shape) == 2: xtime = xtime[:, :] for index in range(xtime.shape[0]): local_times.append(xtime[index, :].tostring()) else: local_times = xtime[:] if (len(local_times) == 0): local_times = ['0'] nTime = len(local_times) for time_idx in range(nTime): if local_times[time_idx] not in all_times: local_indices.append(time_idx) file_names.append(file_name) all_times.append(local_times[time_idx]) i_file = i_file + 1 nc_file.close() if use_progress_bar: time_bar.update(i_file) if use_progress_bar: time_bar.finish() return (local_indices, file_names) # }}}
def build_topo_point_and_polygon_lists(nc_file, output_32bit, lonlat): # {{{ if output_32bit: dtype = 'f4' else: dtype = 'f8' xVertex, yVertex, zVertex = \ _build_location_list_xyz(nc_file, 'Vertex', output_32bit, lonlat) nCells = len(nc_file.dimensions['nCells']) nEdges = len(nc_file.dimensions['nEdges']) maxEdges = len(nc_file.dimensions['maxEdges']) nEdgesOnCell = nc_file.variables['nEdgesOnCell'][:] verticesOnCell = nc_file.variables['verticesOnCell'][:, :] - 1 edgesOnCell = nc_file.variables['edgesOnCell'][:, :] - 1 verticesOnEdge = nc_file.variables['verticesOnEdge'][:] - 1 cellsOnEdge = nc_file.variables['cellsOnEdge'][:] - 1 # 4 points for each edge face nPoints = 4 * nEdges # 1 polygon for each edge and cell nPolygons = nEdges + nCells X = numpy.zeros(nPoints, dtype) Y = numpy.zeros(nPoints, dtype) Z = numpy.zeros(nPoints, dtype) outIndex = 0 # The points on an edge are vertex 0, 1, 1, 0 on that edge, making a # vertical rectangle if the points are offset iEdges, voe = numpy.meshgrid(numpy.arange(nEdges), [0, 1, 1, 0], indexing='ij') iVerts = verticesOnEdge[iEdges, voe].ravel() X[:] = xVertex[iVerts] Y[:] = yVertex[iVerts] Z[:] = zVertex[iVerts] vertices = (X, Y, Z) verticesOnPolygon = -1 * numpy.ones((nPolygons, maxEdges), int) verticesOnPolygon[0:nEdges, 0:4] = \ numpy.arange(4*nEdges).reshape(nEdges, 4) # Build cells if use_progress_bar: widgets = [ 'Build cell connectivity: ', Percentage(), ' ', Bar(), ' ', ETA() ] bar = ProgressBar(widgets=widgets, maxval=nCells).start() else: print("Build cell connectivity...") outIndex = nEdges for iCell in range(nCells): neoc = nEdgesOnCell[iCell] eocs = edgesOnCell[iCell, 0:neoc] vocs = verticesOnCell[iCell, 0:neoc] for index in range(neoc): iVert = vocs[index] iEdge = eocs[index] # which vertex on the edge corresponds to iVert? coes = cellsOnEdge[iEdge, :] voes = verticesOnEdge[iEdge, :] if coes[0] == iCell: if voes[0] == iVert: voe = 0 else: voe = 1 else: if voes[0] == iVert: voe = 3 else: voe = 2 verticesOnPolygon[nEdges + iCell, index] = 4 * iEdge + voe outIndex += neoc if use_progress_bar: bar.update(iCell) if use_progress_bar: bar.finish() validVerts = verticesOnPolygon >= 0 if lonlat: lonEdge = numpy.rad2deg(nc_file.variables['lonEdge'][:]) latEdge = numpy.rad2deg(nc_file.variables['latEdge'][:]) lonCell = numpy.rad2deg(nc_file.variables['lonCell'][:]) latCell = numpy.rad2deg(nc_file.variables['latCell'][:]) lonPolygon = numpy.append(lonEdge, lonCell) latPolygon = numpy.append(latEdge, latCell) vertices, verticesOnPolygon = _fix_lon_lat_vertices( vertices, verticesOnPolygon, validVerts, lonPolygon) if nc_file.on_a_sphere.strip() == 'NO' and \ nc_file.is_periodic.strip() == 'YES': if lonlat: xcoord = lonPolygon ycoord = latPolygon else: xEdge = numpy.rad2deg(nc_file.variables['xEdge'][:]) yEdge = numpy.rad2deg(nc_file.variables['yEdge'][:]) xCell = numpy.rad2deg(nc_file.variables['xCell'][:]) yCell = numpy.rad2deg(nc_file.variables['yCell'][:]) xcoord = numpy.append(xEdge, xCell) ycoord = numpy.append(yEdge, yCell) vertices, verticesOnPolygon = _fix_periodic_vertices( vertices, verticesOnPolygon, validVerts, xcoord, ycoord, nc_file.x_period, nc_file.y_period) nPoints = len(vertices[0]) # we want to know the cells corresponding to each point. The first two # points correspond to the first cell, the second two to the second cell # (if any). cell_to_point_map = -1 * numpy.ones((nPoints), int) boundary_mask = numpy.zeros((nPoints), bool) # first cell on edge always exists coe = cellsOnEdge[:, 0].copy() for index in range(2): voe = verticesOnPolygon[0:nEdges, index] cell_to_point_map[voe] = coe boundary_mask[voe] = False # second cell on edge may not exist coe = cellsOnEdge[:, 1].copy() mask = coe == -1 # use the first cell if the second doesn't exist coe[mask] = cellsOnEdge[:, 0][mask] for index in range(2, 4): voe = verticesOnPolygon[0:nEdges, index] cell_to_point_map[voe] = coe boundary_mask[voe] = mask # for good measure, make sure vertices on cell are also accounted for for index in range(maxEdges): iCells = numpy.arange(nCells) voc = verticesOnPolygon[nEdges:nEdges + nCells, index] mask = index < nEdgesOnCell cell_to_point_map[voc[mask]] = iCells[mask] boundary_mask[voc[mask]] = False connectivity = verticesOnPolygon[validVerts] validCount = numpy.sum(numpy.array(validVerts, int), axis=1) offsets = numpy.cumsum(validCount, dtype=int) valid_mask = numpy.ones(nCells, bool) return vertices, connectivity, offsets, valid_mask, \ cell_to_point_map, boundary_mask.ravel() # }}}
def main(): global args args = parser.parse_args() cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False task_name = args.task_name epoch_size = args.epoch_size batch_size = args.batch_size result_path = os.path.join(args.result_path, args.task_name) if args.style_A: result_path = os.path.join(result_path, args.style_A) result_path = os.path.join(result_path, args.model_arch) model_path = os.path.join(args.model_path, args.task_name) if args.style_A: model_path = os.path.join(model_path, args.style_A) model_path = os.path.join(model_path, args.model_arch) data_style_A, data_style_B, test_style_A, test_style_B = get_data() if args.task_name.startswith('edges2'): test_A = read_images(test_style_A, 'A', args.image_size) test_B = read_images(test_style_B, 'B', args.image_size) elif args.task_name == 'handbags2shoes' or args.task_name == 'shoes2handbags' or args.task_name == 'tshirts2watches' or args.task_name == 'watches2tshirts': test_A = read_images(test_style_A, 'B', args.image_size) test_B = read_images(test_style_B, 'B', args.image_size) else: test_A = read_images(test_style_A, None, args.image_size) test_B = read_images(test_style_B, None, args.image_size) test_A = Variable(torch.FloatTensor(test_A), volatile=True) test_B = Variable(torch.FloatTensor(test_B), volatile=True) if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_path): os.makedirs(model_path) generator_A = Generator() generator_B = Generator() discriminator_A = Discriminator() discriminator_B = Discriminator() if cuda: test_A = test_A.cuda() test_B = test_B.cuda() generator_A = generator_A.cuda() generator_B = generator_B.cuda() discriminator_A = discriminator_A.cuda() discriminator_B = discriminator_B.cuda() data_size = min(len(data_style_A), len(data_style_B)) n_batches = (data_size // batch_size) recon_criterion = nn.MSELoss() gan_criterion = nn.BCELoss() feat_criterion = nn.HingeEmbeddingLoss() gen_params = chain(generator_A.parameters(), generator_B.parameters()) dis_params = chain(discriminator_A.parameters(), discriminator_B.parameters()) optim_gen = optim.Adam(gen_params, lr=args.learning_rate, betas=(0.5, 0.999), weight_decay=0.00001) optim_dis = optim.Adam(dis_params, lr=args.learning_rate, betas=(0.5, 0.999), weight_decay=0.00001) iters = 0 gen_loss_total = [] dis_loss_total = [] for epoch in range(epoch_size): data_style_A, data_style_B = shuffle_data(data_style_A, data_style_B) widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(maxval=n_batches, widgets=widgets) pbar.start() for i in range(n_batches): pbar.update(i) generator_A.zero_grad() generator_B.zero_grad() discriminator_A.zero_grad() discriminator_B.zero_grad() A_path = data_style_A[i * batch_size:(i + 1) * batch_size] B_path = data_style_B[i * batch_size:(i + 1) * batch_size] if args.task_name.startswith('edges2'): A = read_images(A_path, 'A', args.image_size) B = read_images(B_path, 'B', args.image_size) elif args.task_name == 'handbags2shoes' or args.task_name == 'shoes2handbags' or args.task_name == 'tshirts2watches' or args.task_name == 'watches2tshirts': A = read_images(A_path, 'B', args.image_size) B = read_images(B_path, 'B', args.image_size) else: A = read_images(A_path, None, args.image_size) B = read_images(B_path, None, args.image_size) A = Variable(torch.FloatTensor(A)) B = Variable(torch.FloatTensor(B)) if cuda: A = A.cuda() B = B.cuda() AB = generator_B(A) BA = generator_A(B) ABA = generator_A(AB) BAB = generator_B(BA) # Reconstruction Loss recon_loss_A = recon_criterion(ABA, A) recon_loss_B = recon_criterion(BAB, B) # Real/Fake GAN Loss (A) A_dis_real, A_feats_real = discriminator_A(A) A_dis_fake, A_feats_fake = discriminator_A(BA) dis_loss_A, gen_loss_A = get_gan_loss(A_dis_real, A_dis_fake, gan_criterion, cuda) fm_loss_A = get_fm_loss(A_feats_real, A_feats_fake, feat_criterion) # Real/Fake GAN Loss (B) B_dis_real, B_feats_real = discriminator_B(B) B_dis_fake, B_feats_fake = discriminator_B(AB) dis_loss_B, gen_loss_B = get_gan_loss(B_dis_real, B_dis_fake, gan_criterion, cuda) fm_loss_B = get_fm_loss(B_feats_real, B_feats_fake, feat_criterion) # Total Loss if iters < args.gan_curriculum: rate = args.starting_rate else: rate = args.default_rate gen_loss_A_total = (gen_loss_B * 0.1 + fm_loss_B * 0.9) * ( 1. - rate) + recon_loss_A * rate gen_loss_B_total = (gen_loss_A * 0.1 + fm_loss_A * 0.9) * ( 1. - rate) + recon_loss_B * rate if args.model_arch == 'discogan': gen_loss = gen_loss_A_total + gen_loss_B_total dis_loss = dis_loss_A + dis_loss_B elif args.model_arch == 'recongan': gen_loss = gen_loss_A_total dis_loss = dis_loss_B elif args.model_arch == 'gan': gen_loss = (gen_loss_B * 0.1 + fm_loss_B * 0.9) dis_loss = dis_loss_B if iters % args.update_interval == 0: dis_loss.backward() optim_dis.step() else: gen_loss.backward() optim_gen.step() if iters % args.log_interval == 0: print("---------------------") print("GEN Loss:", as_np(gen_loss_A.mean()), as_np(gen_loss_B.mean())) print("Feature Matching Loss:", as_np(fm_loss_A.mean()), as_np(fm_loss_B.mean())) print("RECON Loss:", as_np(recon_loss_A.mean()), as_np(recon_loss_B.mean())) print("DIS Loss:", as_np(dis_loss_A.mean()), as_np(dis_loss_B.mean())) if iters % args.image_save_interval == 0: AB = generator_B(test_A) BA = generator_A(test_B) ABA = generator_A(AB) BAB = generator_B(BA) n_testset = min(test_A.size()[0], test_B.size()[0]) subdir_path = os.path.join( result_path, str(iters / args.image_save_interval)) if os.path.exists(subdir_path): pass else: os.makedirs(subdir_path) for im_idx in range(n_testset): A_val = test_A[im_idx].cpu().data.numpy().transpose( 1, 2, 0) * 255. B_val = test_B[im_idx].cpu().data.numpy().transpose( 1, 2, 0) * 255. BA_val = BA[im_idx].cpu().data.numpy().transpose(1, 2, 0) * 255. ABA_val = ABA[im_idx].cpu().data.numpy().transpose( 1, 2, 0) * 255. AB_val = AB[im_idx].cpu().data.numpy().transpose(1, 2, 0) * 255. BAB_val = BAB[im_idx].cpu().data.numpy().transpose( 1, 2, 0) * 255. filename_prefix = os.path.join(subdir_path, str(im_idx)) scipy.misc.imsave(filename_prefix + '.A.jpg', A_val.astype(np.uint8)[:, :, ::-1]) scipy.misc.imsave(filename_prefix + '.B.jpg', B_val.astype(np.uint8)[:, :, ::-1]) scipy.misc.imsave(filename_prefix + '.BA.jpg', BA_val.astype(np.uint8)[:, :, ::-1]) scipy.misc.imsave(filename_prefix + '.AB.jpg', AB_val.astype(np.uint8)[:, :, ::-1]) scipy.misc.imsave(filename_prefix + '.ABA.jpg', ABA_val.astype(np.uint8)[:, :, ::-1]) scipy.misc.imsave(filename_prefix + '.BAB.jpg', BAB_val.astype(np.uint8)[:, :, ::-1]) if iters % args.model_save_interval == 0: torch.save( generator_A, os.path.join( model_path, 'model_gen_A-' + str(iters / args.model_save_interval))) torch.save( generator_B, os.path.join( model_path, 'model_gen_B-' + str(iters / args.model_save_interval))) torch.save( discriminator_A, os.path.join( model_path, 'model_dis_A-' + str(iters / args.model_save_interval))) torch.save( discriminator_B, os.path.join( model_path, 'model_dis_B-' + str(iters / args.model_save_interval))) iters += 1
with tf.variable_scope("model") as scope: optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer, use_prettytensor=True) with tf.variable_scope("model", reuse=True) as scope: p_rep = tf.sigmoid(model.sample_prior(N_MINIBATCH)) init = tf.initialize_all_variables() init.run() n_epoch = 100 n_iter_per_epoch = 1000 for epoch in range(n_epoch): avg_loss = 0.0 widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(n_iter_per_epoch, widgets=widgets) pbar.start() for t in range(n_iter_per_epoch): pbar.update(t) x_train, _ = mnist.train.next_batch(N_MINIBATCH) info_dict = inference.update(feed_dict={x_ph: x_train}) avg_loss += info_dict['loss'] # Take average over all ELBOs during the epoch, and over minibatch # of data points (images). avg_loss = avg_loss / n_iter_per_epoch avg_loss = avg_loss / N_MINIBATCH # Print a lower bound to the average marginal likelihood for an # image.
def generate_subtitles(source_path, *, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, output=None, verbose=False) -> str: audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: widgets = [ 'Converting speech regions to FLAC files: ', Percentage(), ' ', Bar(), ' ', ETA() ] p_bar = OptionalProgressBar(verbose=verbose, widgets=widgets, maxval=len(regions)) try: p_bar.start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) p_bar.update(i) p_bar.finish() widgets = [ 'Performing speech recognition: ', Percentage(), ' ', Bar(), ' ', ETA() ] p_bar = OptionalProgressBar(verbose=verbose, widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) p_bar.update(i) p_bar.finish() except KeyboardInterrupt: p_bar.finish() pool.terminate() pool.join() print('Cancelling transcription') raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter: BaseFormatter = FORMATTERS.get(subtitle_file_format)() formatted_subtitles = formatter.generate(timed_subtitles) with smart_open(output) as f: f.write(formatted_subtitles) os.remove(audio_filename) if output: print('Subtitles file created at {subtitle_file_path}'.format( subtitle_file_path=output)) return formatted_subtitles
def process(self, filename, cols): Rat.frame_loc = 0 (fname_name, ext) = os.path.splitext(filename) left_right = fname_name[-1] self.read_data(filename, cols=cols) # data_p1 = self.data[:, 1] data_p5 = self.df.loc[:, 'nonzero_p5'] total_frames = len(data_p5) # each sample represents 5 frames (5 sec) moving_win = self.windowed_view(data_p5, 10, 5) win_mean = np.mean(moving_win, axis=1) label = (win_mean > Rat.thMin) & (win_mean < Rat.thMax) label[:Rat.jump_rows] = False print('win_mean.shape %d' % win_mean.shape) print('nonzero of label %d' % np.count_nonzero(label)) label_win = self.windowed_view(label, 5, 4) sum_label = np.sum(label_win, axis=1) labelLick = (sum_label == 5) # labelLick = sum_label labelLick1 = labelLick.copy() for i in range(labelLick.size): if labelLick[i] == True: labelLick1[i:i + 6] = True for i in range(6, labelLick1.size): if labelLick1[i] == False: labelLick1[i - 6:i] = False # labelLick_file_name = '{}/_labelLick_{}.csv'.format(str(self.video_dir), left_right) # labelLick.tofile(labelLick_file_name, sep='\n') # # labelLick_file_name = '{}/_labelLick1_{}.csv'.format(str(self.video_dir), left_right) # labelLick1.tofile(labelLick_file_name, sep='\n') # label_file_name = '{}/_label_{}.csv'.format(str(self.video_dir), left_right) # label.tofile(label_file_name, sep='\n') print('label_win.shape ', label_win.shape) print('labelLick.shape ', labelLick.shape) print('sum of labelLick %d, size %d' % (np.sum(labelLick), labelLick.size)) #plt.plot(label) #plt.show() # (head_path, vname) = os.path.split(str(self.video_dir)) video_file = '{}/{}.avi.mkv'.format(str(self.video_dir), self.out_dir.name) # print ('Rat::video_file ', video_file) cap = cv2.VideoCapture(video_file) bOpenVideo = cap.isOpened() if bOpenVideo == False: print('Open Video failed') return Rat.fps = cap.get(cv2.CAP_PROP_FPS) Rat.width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) Rat.height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps_out = Rat.fps // 2 freqStr = '{0:d}L'.format(int(1000 / fps_out)) print('fps = %d, w %d, h %d, total_frames %d, freq_out %s' % (Rat.fps, Rat.width, Rat.height, total_frames, freqStr)) print('min duration %d, diff thresh [%.4f %.4f]' % (Rat.th_min_duration, Rat.thMin, Rat.thMax)) bVideoWR = False extract_clips = 0 widgets = [Percentage(), Bar()] pbar = ProgressBar(widgets=widgets, maxval=labelLick1.size).start() for i in range(5, labelLick1.size): frameCounter = i * 5 if labelLick1[i] == True: #print(win_mean[i]) if bVideoWR == False: start_frame = frameCounter end_frame = frameCounter bVideoWR = True else: if frameCounter < total_frames: end_frame = frameCounter else: end_frame = total_frames else: if bVideoWR == True: bVideoWR = False if end_frame - start_frame > Rat.th_min_duration: self.write_features(start_frame, end_frame, fps_out, left_right) self.write_video(cap, start_frame, end_frame, fps_out, left_right) extract_clips += 1 pbar.update(i) pbar.finish() print('extract_clips: ', extract_clips)
def generate_subtitles( source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, ): audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) multiprocessing.freeze_support() pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() if not is_same_language(src_language, dst_language): if api_key: google_translate_api_key = api_key translator = Translator(dst_language, google_translate_api_key, dst=dst_language, src=src_language) prompt = "Translating from {0} to {1}: ".format( src_language, dst_language) widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. " "See --help for further information.") return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base, ext = os.path.splitext(source_path) dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) return dest
NUM_HEROES = 108 NUM_FEATURES = NUM_HEROES * 2 # Our training label vector, Y, is a bit vector indicating # whether radiant won (1) or lost (-1) NUM_MATCHES = matches.count() # Initialize training matrix X = np.zeros((NUM_MATCHES, NUM_FEATURES), dtype=np.int8) # Initialize training label vector Y = np.zeros(NUM_MATCHES, dtype=np.int8) widgets = [FormatLabel('Processed: %(value)d/%(max)d matches. '), ETA(), ' ', Percentage(), ' ', Bar()] pbar = ProgressBar(widgets = widgets, maxval = NUM_MATCHES).start() for i, record in enumerate(matches.find()): Y[i] = 1 if record['radiant_win'] else -1 players = record['players'] for player in players: hero_id = player['hero_id'] - 1 # If the left-most bit of player_slot is set, # this player is on dire, so push the index accordingly player_slot = player['player_slot'] if player_slot >= 128: hero_id += NUM_HEROES X[i, hero_id] = 1
def createprogress(count): """Return progress Bar""" widgets = [Percentage(), ' ', Bar(), ' ', AdaptiveETA()] pbar = ProgressBar(widgets=widgets, maxval=count) pbar.start() return pbar
def modify_one_by_one_function(self, name): """Apply a function (local search, mutation) to all chromosomes.""" start = time.time() if self.progress_bar: print("{}:".format(name)) if name == "Local search": current_function = self.memetic_function elif name == "Mutation": current_function = self.mutation_function name = name[:8] + ' ' + self.config["mutation_type"] else: raise NameError("Bad type of function.") if self.iteration > 1: if name in self.logs[-2].keys(): if self.logs[-2][name]["step_time"] < 4: self.progress_bar = False else: self.progress_bar = True if self.fitness_function.name in ["fully connected", "convnet"]: self.progress_bar = False if self.pool: p = Pool(self.pool_size) manager = Manager() lock = manager.Lock() counter = manager.Value('i', 0) if self.progress_bar: pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], term_width=60, maxval=len(self.population)).start() else: pbar = None def pool_function(inside_lock, inside_counter, inside_member): inside_lock.acquire() inside_counter.value += 1 inside_lock.release() inside_member.apply_on_chromosome(current_function, gpu=inside_counter.value % 4) inside_lock.acquire() if pbar: pbar.update(inside_counter.value) inside_lock.release() return inside_member func = partial(pool_function, lock, counter) first = 1 if self.elitism and name == "Mutation" else 0 members = p.map(func, self.population[first:]) if self.elitism and name == "Mutation": members.append(self.population[0]) self.population.current_population = members p.terminate() else: if self.progress_bar: pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], term_width=60, maxval=len(self.population)).start() ignor_first = self.elitism and name == "Mutation" for i, member in enumerate(self.population): if self.progress_bar: pbar.update(i + 1) if not ignor_first: member.apply_on_chromosome(current_function) ignor_first = False if self.progress_bar: pbar.finish() step_time = time.time() - start if step_time < 120: print('{0} time: {1:.2f}s\n'.format(name, step_time)) else: print('{0} time: {1:.2f}min\n'.format(name, step_time // 60)) return step_time, name
def example0(): pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=300).start() for i in range(300): time.sleep(0.01) pbar.update(i + 1) pbar.finish()
sess.run(tf.global_variables_initializer()) add_num = 0 if os.path.exists('../logs_lm/checkpoint'): print('loading language model...') latest = tf.train.latest_checkpoint('logs_lm') add_num = int(latest.split('_')[-1]) saver.restore(sess, latest) writer = tf.summary.FileWriter('../logs_lm/tensorboard', tf.get_default_graph()) for k in range(epochs): total_loss = 0 batch = train_data.get_lm_batch() widgets = [ 'this is the ' + str(k + 1) + 'th epochs tinning !!!', Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=batch_num).start() for i in range(batch_num): input_batch, label_batch = next(batch) feed = {lm.x: input_batch, lm.y: label_batch} cost, _ = sess.run([lm.mean_loss, lm.train_op], feed_dict=feed) total_loss += cost if (k * batch_num + i) % 10 == 0: rs = sess.run(merged, feed_dict=feed) writer.add_summary(rs, k * batch_num + i) pbar.update(i) pbar.finish() print('epochs', k + 1, ': average loss = ', total_loss / batch_num) saver.save(sess, '../logs_lm/model_%d' % (epochs + add_num))
def main(): if TEST: sys.stdout.write('Run in TEST mode! \n') args = sys.argv[1:] if len(args) < 2: return usage() infile = args[0] outfile = args[1] check_outfile_path(outfile) fin = ROOT.TFile(infile) t = fin.Get('tree') entries = t.GetEntriesFast() pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=entries).start() time_start = time() # fout = ROOT.TFile(outfile, "RECREATE") # t_out = ROOT.TTree('signal', 'signal') # mystruct = ROOT.MyTreeStruct() ## mystruct2 = ROOT.MyTreeStruct2() # t_out.Branch('vtx_mrecpipi', mystruct, 'vtx_mrecpipi/D') # t_out.Branch('indexmc', mystruct2, 'indexmc/D') # t_out.Branch('pdgid', mystruct, 'm_pdgid[100]/I') # t_out.Branch('trkidx', mystruct, 'm_trkidx[100]/I') # t_out.Branch('motherpid', mystruct, 'm_motherpid[100]/I') # t_out.Branch('motheridx', mystruct, 'm_motheridx[100]/I') for jentry in xrange(entries): pbar.update(jentry + 1) # get the next tree in the chain and verify ientry = t.LoadTree(jentry) if ientry < 0: break # copy next entry into memory and verify if TEST and ientry > 10000: break nb = t.GetEntry(jentry) if nb <= 0: continue if NonPiPiJpsi: # Non-PiPiJpsi if not (check_pipiJpsi(t)): fill_histograms_all_combination(t) else: # Normal fill_histograms_all_combination(t) fout = ROOT.TFile(outfile, "RECREATE") # t_out.Write() write_histograms() fout.Close() pbar.finish() dur = duration(time() - time_start) sys.stdout.write(' \nDone in %s. \n' % dur)
def check_facebookcontact(elementtype, xml_root): global LIMIT_COUNTER, ARGS list_suffix2kill = [ 'about', 'community', 'info', 'posts', 'reviews', 'services', 'timeline', ] list_subdomains2kill = [ 'm', 'web', 'b-m', 'da-dk', 'de-de', 'el-gr', 'en-gb', 'es-es', 'fr-fr', 'he-il', 'hr-hr', 'is-is', 'it-it', 'nl-nl', 'pl-pl', 'si-si' ] bar_max = len(xml_root.findall(elementtype)) if int(ARGS.limit) > 0 and int( ARGS.limit) < len(xml_root.findall('relation')) + len( xml_root.findall('way')) + len(xml_root.findall('node')): bar_max = int(ARGS.limit) widgets = [Percentage(), ' ', Bar(), ' ', ETA(), ' ', AdaptiveETA()] p_bar = progressbar.ProgressBar(widgets=widgets, maxval=bar_max) p_bar.start() for element in xml_root.findall(elementtype): element_changed = False # save file and exit if limit is reached if int(ARGS.limit) > 0 and LIMIT_COUNTER >= int(ARGS.limit): with open(ARGS.export, 'w') as f: f.write(ET.tostring(xml_root, encoding='utf8').decode('utf8')) break # iterate throug elements for tag in element.findall('tag'): initial_url = '' # if we have found a website-tag with facebook in the url we move it to the contact:facebook-tag if tag.attrib['k'] == 'website' and tag.attrib['v'].find( 'facebook') >= 0: tag.attrib['k'] = 'contact:facebook' element_changed = True if tag.attrib['k'] == 'contact:facebook': # save initial url-value to variable: initial_url initial_url = tag.attrib['v'] # load initial url into variable for substitute-url sub_url = initial_url # apply-subdomain-replacement for subdomain in list_subdomains2kill: sub_url = sub_url.replace(subdomain + '.facebook.com', 'www.facebook.com') # fix: urls starting with // sub_url = re.sub(r"^\/\/(www\.|)facebook(\.com|\.de|\.pl)\/", "https://www.facebook.com/", sub_url, 0) # fix: urls starting with www.facebook* or facebook* sub_url = re.sub(r"^(www\.|)facebook(\.com|\.de|\.pl)\/", "https://www.facebook.com/", sub_url, 0) # fix: replace http(s)://facebook* by https://www.facebook.com sub_url = re.sub(r"http(|s)\:\/\/facebook(\.com|\.de|\.pl)\/", "https://www.facebook.com/", sub_url, 0) # fix: cut the category-part out of the url sub_url = re.sub( r"^https\:\/\/(www\.|)facebook\.com\/pages\/category\/[0-9a-zA-Z-]+\/", "https://www.facebook.com/pages/", sub_url, 0) # test: if url is a redirection find its final target and use this as substitute-url if requests.get(sub_url).status_code != 200: test_url = re.sub( r"^https\:\/\/www\.facebook\.com\/pages\/", "https://www.facebook.com/", sub_url, 0) if requests.get(test_url).status_code == 200: sub_url = test_url # if the url does not contain photo/media-parts we do not need any get parameters if sub_url.find('profile.php') == -1 and sub_url.find( '/media/set/') == -1 and sub_url.find('/photo/') == -1: sub_url = re.findall(r"^([^?]+)", sub_url)[0] # if we got a new url so far, we tag this element as changed if str(initial_url) != str(sub_url): # print(initial_url + ' > ' + sub_url) tag.attrib['v'] = sub_url element_changed = True # sometimes we get redirected via the login-page. in this case we will have to html-decrypt the next-param and replay the get-param-removal r = requests.get(tag.attrib['v'], headers=HEADERS) if r.url != tag.attrib['v']: login_url = re.findall( r"https:\/\/www\.facebook\.com\/login\/\?next\=(.*)", r.url) if login_url: decoded_url = urllib.parse.unquote(login_url[0]) if decoded_url.find( 'profile.php') == -1 and decoded_url.find( '/media/set/') == -1 and decoded_url.find( '/photo/') == -1: decoded_url = re.findall(r"^([^?]+)", decoded_url)[0] if initial_url != decoded_url: tag.attrib['v'] = decoded_url else: if initial_url != r.url: # if we can reach that url we take it as replacement otherwise remove it if r.status_code == 200: tag.attrib['v'] = r.url if r.status_code == 404: tag.attrib['v'] = '' # print(r.url + ' >> DELETED because of 404') # kill all suffixes from url for suffix in list_suffix2kill: if tag.attrib['v'].endswith('/' + suffix): tag.attrib['v'] = tag.attrib['v'][:-(len(suffix) + 1)] if tag.attrib['v'].endswith('/' + suffix + '/'): tag.attrib['v'] = tag.attrib['v'][:-(len(suffix) + 2)] # replay login-redirection fix if tag.attrib['v'] != initial_url: # print(initial_url + ' >>> ' + tag.attrib['v']) if tag.attrib['v'].find( 'profile.php') == -1 and tag.attrib['v'].find( '/media/set/') == -1 and tag.attrib['v'].find( '/photo/') == -1 and re.findall( r"^([^?]+)", tag.attrib['v']): tag.attrib['v'] = re.findall(r"^([^?]+)", tag.attrib['v'])[0] if tag.attrib['v'] != '' and requests.get( tag.attrib['v'], headers=HEADERS).status_code == 404: tag.attrib['v'] = '' # print(initial_url + ' >> ' + tag.attrib['v']) sys.stdout.write('[' + str(LIMIT_COUNTER) + '/' + str(bar_max) + '] ' + initial_url + "\n >>> " + tag.attrib['v'] + " \n") p_bar.update(LIMIT_COUNTER) if element_changed: if LIMIT_COUNTER + 1 > int(ARGS.offset) or (LIMIT_COUNTER == 0 and int(ARGS.offset) == 0): # set modify-tag if element is in range element.attrib['action'] = 'modify' empty_tags = element.findall("tag[@v='']") if empty_tags: for empty_tag in empty_tags: element.remove(empty_tag) RESULT_ROOT.append(element) LIMIT_COUNTER += 1 if int(ARGS.limit) > 0 and LIMIT_COUNTER >= int(ARGS.limit): p_bar.finish()
def train(self): config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.45 with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): counter = self.build_model(sess) saver = tf.train.Saver(tf.all_variables(), keep_checkpoint_every_n_hours=2) # summary_op = tf.merge_all_summaries() summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph) keys = ["d_loss", "g_loss"] log_vars = [] log_keys = [] for k, v in self.log_vars: if k in keys: log_vars.append(v) log_keys.append(k) # print(k, v) generator_lr = cfg.TRAIN.GENERATOR_LR discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR num_embedding = cfg.TRAIN.NUM_EMBEDDING lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH number_example = self.dataset.train._num_examples updates_per_epoch = int(number_example / self.batch_size) epoch_start = int(counter / updates_per_epoch) for epoch in range(epoch_start, self.max_epoch): widgets = [ "epoch #%d|" % epoch, Percentage(), Bar(), ETA() ] pbar = ProgressBar(maxval=updates_per_epoch, widgets=widgets) pbar.start() if epoch % lr_decay_step == 0 and epoch != 0: generator_lr *= 0.5 discriminator_lr *= 0.5 all_log_vals = [] for i in range(updates_per_epoch): pbar.update(i) # training d images, wrong_images, embeddings, _, _ =\ self.dataset.train.next_batch(self.batch_size, num_embedding) feed_dict = { self.images: images, self.wrong_images: wrong_images, self.embeddings: embeddings, self.generator_lr: generator_lr, self.discriminator_lr: discriminator_lr } # train d feed_out = [ self.discriminator_trainer, self.d_sum, self.hist_sum, log_vars, self.embeddings, self.fake_embeddings ] for j in range(self.opt.dis_steps): _, d_sum, hist_sum, log_vals, real_emb, fake_emb = sess.run( feed_out, feed_dict) summary_writer.add_summary(d_sum, counter) summary_writer.add_summary(hist_sum, counter) all_log_vals.append(log_vals) # train g feed_out = [self.generator_trainer, self.g_sum] for k in range(self.opt.gen_steps): _, g_sum = sess.run(feed_out, feed_dict) summary_writer.add_summary(g_sum, counter) # save checkpoint counter += 1 if counter % self.snapshot_interval == 0: snapshot_path = "%s/%s_%s.ckpt" %\ (self.checkpoint_dir, self.exp_name, str(counter)) fn = saver.save(sess, snapshot_path) EMB = np.concatenate((real_emb, fake_emb)) y = np.zeros(EMB.shape[0]) y[:real_emb.shape[0]] = 1 print("Model saved in file: %s" % fn) img_sum = self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY) summary_writer.add_summary(img_sum, counter) avg_log_vals = np.mean(np.array(all_log_vals), axis=0) dic_logs = {} for k, v in zip(log_keys, avg_log_vals): dic_logs[k] = v # print(k, v) log_line = "; ".join("%s: %s" % (str(k), str(dic_logs[k])) for k in dic_logs) print("Epoch %d | " % (epoch) + log_line) sys.stdout.flush() if np.any(np.isnan(avg_log_vals)): raise ValueError("NaN detected!")
def europe_data(network_objects, network_path, data_path, nuts_path, europe_data_path): if network_path is not None: out_path = str(network_path) + '/freight_data' network_files = str(network_path) + '/network_files' nuts_path = str(data_path) + '/nuts_borders' europe_data_path = str(data_path) + '/GQGV_2014_Mikrodaten.csv' if os.path.isfile(str(out_path) + '/nuts_europe_dict.pkl') is False or out_path is None: print(datetime.datetime.now(), 'Europe data manipulating begins ...') if network_path is not None: # Find best graph: if os.path.isfile(str(network_files) + "/eu_connected_graph_bytime.gpickle"): graph_path = str(network_files) + '/eu_connected_graph_bytime.gpickle' print(datetime.datetime.now(), 'Graph loaded: eu_connected_graph_bytime') elif os.path.isfile(str(network_path) + "/bc_official/eu_network_graph_with_official_bc.gpickle"): graph_path = str(network_path) + '/bc_official/eu_network_graph_with_official_bc.gpickle' print(datetime.datetime.now(), 'Graph loaded: eu_network_graph_with_official_bc') else: graph_path = str(network_files) + '/eu_network_largest_graph_bytime.gpickle' print(datetime.datetime.now(), 'Graph loaded: eu_network_largest_graph_bytime') # Check if out_path exists or create it if not os.path.exists(str(out_path)): os.makedirs(str(out_path)) print(datetime.datetime.now(), 'Directory created.') else: print(datetime.datetime.now(), 'Directory exists.') # Create dictionary with nuts id and coordinates of centroid and closest node id to it # IMPORT G graph with largest network G = nx.read_gpickle(str(graph_path)) # # IMPORT nodes_europe # file = open(str(network_files) + "/europe_nodes_dict2056.pkl", 'rb') file = open(str(network_files) + "/europe_nodes_dict4326.pkl", 'rb') nodes_europe_2056 = pickle.load(file) file.close() else: out_path = None G = network_objects[0] nodes_europe_2056 = network_objects[2] print(datetime.datetime.now(), 'Graph has: ' + str( len([len(c) for c in sorted(nx.connected_components(G), key=len, reverse=True)])) + ' island with ' + str(G.number_of_nodes()) + '/' + str(G.number_of_edges()) + ' (Nnodes/Nedges)') print(datetime.datetime.now(), 'Nnodes in nodes_europe_2056: ' + str(len(nodes_europe_2056))) if os.path.isfile(str(out_path) + '/nuts_europe_dict.pkl') is False or out_path is None: # Merge data from NUTS into one gdf: unique_nuts_gdf = nuts_merging(nuts_path) # Build tree for KDTree nearest neighbours search, in G only start and end nodes are included # OPTION 3: input only nodes in largest network in G G_nodes = list(G.nodes) G_nodes.sort(key=float) G_lonlat = [] i = 0 node_sel = G_nodes[i] for id in list(nodes_europe_2056): if node_sel == int(id): lonlat = nodes_europe_2056[id] G_lonlat.append(lonlat) if i < len(G_nodes) - 1: i += 1 node_sel = G_nodes[i] print(datetime.datetime.now(), 'KDTree has: ' + str(len(G_lonlat)) + ' nodes.') print('------------------------------------------------------------------------') nuts_europe = {} tree = spatial.KDTree(G_lonlat) pbar = ProgressBar(widgets=[Bar('>', '[', ']'), ' ', Percentage(), ' ', ETA()], maxval=len(unique_nuts_gdf)) for i in range(len(unique_nuts_gdf)): # for i in pbar(range(len(unique_nuts_gdf))): nuts_id = unique_nuts_gdf.iloc[i]['NUTS_ID'] polygon = unique_nuts_gdf.iloc[i]['geometry'] centroid = (polygon.centroid.x, polygon.centroid.y) # this gives the closest nodes id from the nuts centroid coordinates nn = tree.query(centroid) coord = G_lonlat[nn[1]] closest_node_id = int(list(nodes_europe_2056.keys())[list(nodes_europe_2056.values()).index((coord[0], coord[1]))]) # stores as dictionary nuts_europe[nuts_id] = [centroid, closest_node_id] # print(datetime.datetime.now(), i, end="\r") # EXPORT nuts_centroid_dict TO FILE if out_path is not None: with open(str(out_path) + '/nuts_europe_dict' + '.pkl', 'wb') as f: pickle.dump(nuts_europe, f, pickle.HIGHEST_PROTOCOL) print(datetime.datetime.now(), len(nuts_europe)) print('------------------------------------------------------------------------') elif os.path.isfile(str(out_path) + "/od_europesum_df.csv") is False: # CHECKPOINT: load nuts dictionary file = open(str(out_path) + '/nuts_europe_dict.pkl', 'rb') nuts_europe = pickle.load(file) file.close() print(datetime.datetime.now(), 'Nnuts in nuts_europe: ' + str(len(nuts_europe))) print('------------------------------------------------------------------------') if os.path.isfile(str(out_path) + '/od_europesum_df.csv') is False or out_path is None: # load europe OD matrix ('GQGV_2014_Mikrodaten.csv' file) od_europe_df = pd.read_csv(europe_data_path, sep=",") # select relevant columns from dataframe od_europesum_df = od_europe_df[ ['OID', 'ORIGIN', 'DESTINATION', 'BORDER_CROSSING_IN', 'BORDER_CROSSING_OUT', 'KM_PERFORMANCE', 'WEIGHTING_FACTOR', 'DIVISOR']] # add columns (o_node_id, d_node_id) to dataframe with closest node depending origin and destination NUT # also creating list of NUTS ('missing_nuts') which are not defined in the dictionary nuts_europe missing_nuts = [] def od_func_eu(origin, destination, rowname): try: o_node_id = nuts_europe[origin][1] except: o_node_id = None if origin not in missing_nuts: missing_nuts.append(origin) try: d_node_id = nuts_europe[destination][1] except: d_node_id = None if destination not in missing_nuts: missing_nuts.append(destination) # print(datetime.datetime.now(), rowname, end="\r") return pd.Series([o_node_id, d_node_id]) od_europesum_df[['o_node_id', 'd_node_id']] = od_europesum_df.apply( lambda row: od_func_eu(row['ORIGIN'], row['DESTINATION'], row.name), axis=1) df = pd.DataFrame(data={"missing_nuts": missing_nuts}) od_europesum_df = pd.DataFrame.dropna( od_europesum_df) # in case there are missing nuts not defined in the dictionary if out_path is not None: df.to_csv(str(out_path) + "/missing_nuts.csv", sep=',', index=False) od_europesum_df.to_csv(str(out_path) + "/od_europesum_df.csv", sep=",", index=None) print(datetime.datetime.now(), 'Process of manipulating europa data finished') print('------------------------------------------------------------------------') else: print(datetime.datetime.now(), 'Manipulated data already exists.') print('------------------------------------------------------------------------') # Last filter for 2_routing if os.path.isfile(str(out_path) + "/od_incorrect_DABC.csv") is True: od_incorrect_DABC = pd.read_csv(str(out_path) + "/od_incorrect_DABC.csv", encoding='latin1') print('Nroutes in od_incorrect_DABC: '+ str(len(od_incorrect_DABC))) od_incorrect_DABC.head() # DROP ROWS FROM incorrect dataframe from 1_ROUTING droprows = [] print(len(od_europesum_df)) for i in range(len(od_europesum_df)): print(i, end="\r") oid = od_europesum_df.iloc[i]['OID'] if oid in list(od_incorrect_DABC['OID']): droprows.append(i) od_europesum_df=od_europesum_df.drop(od_europesum_df.index[droprows]) od_europesum_df.to_csv(str(out_path) + "/od_europesum_df.csv", sep = ",", index = None, encoding='latin1') print(len(od_europesum_df)) print('------------------------------------------------------------------------') # CH DATA # ------------------------------------------------------------------------------- # Similar process than europe data: # CREATE DICTIONARY WITH CENTROID COORDINATES AND CLOSEST NODE IN SELECTED NETWORK OF EVERY PLZ # plz_ch = {} # tree = spatial.KDTree(G_lonlat) # # def closest_node(centroid, plz_id): # nn = tree.query(centroid, k=10000) # in_ch = ch_border.contains(Point(centroid)) # # this condition ensures to find a closest node of the network that is in the same side of the border than the centroid # if in_ch == True: # for point in nn[1]: # coord = G_lonlat[point] # node_in_ch = ch_border.contains(Point(coord[0], coord[1])) # if node_in_ch == True: # closest_node_id = int( # list(nodes_europe_2056.keys())[list(nodes_europe_2056.values()).index((coord[0], coord[1]))]) # break # elif in_ch == False: # for point in nn[1]: # coord = G_lonlat[point] # node_in_ch = ch_border.contains(Point(coord[0], coord[1])) # if node_in_ch == False: # closest_node_id = int( # list(nodes_europe_2056.keys())[list(nodes_europe_2056.values()).index((coord[0], coord[1]))]) # break # plz_ch[str(plz_id)] = [centroid, closest_node_id, # in_ch] # string because in the freight data plz are stored as strings, so later to match # # for i in range(0, len(plz_gdf)): # plz_id = plz_gdf.iloc[i]['PLZ'] # if plz_id not in list(plz_ch): # poly_list = [] # # this searches for different polygons with the same PLZ code, to find the centroid of the mixture of all of them # for j in range(i, len(plz_gdf)): # if plz_gdf.iloc[j]['PLZ'] == plz_id: # polygon = plz_gdf.iloc[j]['geometry'] # poly_list.append(polygon) # # boundary = gpd.GeoSeries(cascaded_union(poly_list)) # centroid = boundary[0].centroid.coords[0] # funct_sol = closest_node(centroid, plz_id) # print(i, end="\r") # # EXPORT nuts_centroid_dict TO FILE # with open(str(out_path) + '\plz_ch_dict' + '.pkl', 'wb') as f: # pickle.dump(plz_ch, f, pickle.HIGHEST_PROTOCOL) # # print(len(plz_ch)) # # plz_ch # # #JOURNEYCH data # od_ch_df = pd.read_csv(str(in_path)+'/freight_data/freight/gte/GTE_2017/Donnees/journeych.csv', sep=";", # low_memory=False) # # selects relevant columns from the od matrix # od_chsum_df = od_ch_df [['ernr','fromPlz', 'toPlz', 'fromNuts', 'toNuts']] # for i in range(0,len(od_chsum_df)): # od_chsum_df.set_value(i, 'fromPlz', od_chsum_df.iloc[i]['fromPlz'].rstrip()) # od_chsum_df.set_value(i, 'toPlz', od_chsum_df.iloc[i]['toPlz'].rstrip()) # # od_chsum_df.at[i, 'fromPlz'] = od_chsum_df.iloc[i]['fromPlz'].rstrip() #in case set_value gets removed from pandas # # od_chsum_df.at[i, 'toPlz'] = od_chsum_df.iloc[i]['toPlz'].rstrip() #in case set_value gets removed from pandas # # od_chsum_df.iloc[i]['fromPlz'] = od_chsum_df.iloc[i]['fromPlz'].rstrip() #TOO SLOW # # od_chsum_df.iloc[i]['toPlz'] = od_chsum_df.iloc[i]['toPlz'].rstrip() #TOO SLOW # print (i, end="\r") # # # THIS ADDS THE GROSSINGFACTOR TO THE MANIPULATED DATA from switzerland # od_chw_df = pd.read_csv(str(in_path)+'/freight_data/freight/gte/GTE_2017/Donnees/week.csv', sep=";", low_memory=False) # od_chwsum_df = od_chw_df [['ernr','grossingFactor']] # od_chsum_df=od_chsum_df.merge(od_chwsum_df) # od_chsum_df.head() # # # add columns (o_node_id, d_node_id) to dataframe with closest node depending origin and destination PLZ # # also creating list of PLZs ('missing_plz') which are not defined in the dictionary plz_ch # missing_plz = [] # # # def od_func_ch(origin, destination, rowname): # try: # o_node_id = int(plz_ch[origin][1]) # except: # o_node_id = None # if origin not in missing_plz: # missing_plz.append(origin) # try: # d_node_id = int(plz_ch[destination][1]) # except: # d_node_id = None # if destination not in missing_plz: # missing_plz.append(destination) # # print(rowname, end="\r") # return pd.Series([o_node_id, d_node_id]) # # # od_chsum_df[['o_node_id', 'd_node_id']] = od_chsum_df.apply( # lambda row: od_func_ch(row['fromPlz'], row['toPlz'], row.name), axis=1) # # df = pd.DataFrame(data={"missing_plz": missing_plz}) # df.to_csv("./missing_plz.csv", sep=',', index=False) # # od_chsum_df = pd.DataFrame.dropna(od_chsum_df) # in case there are missing nuts not defined in the dictionary # od_chsum_df.to_csv(str(out_path) + "/od_chsum_df.csv", sep=",", index=None) # # print(len(od_chsum_df)) # print(len(missing_plz)) # od_chsum_df.head()
def play_random_games(self, num_games, save_to_disk=False, save_every=100, save_path=''): '''Simulates a specified number of games where each game involves two agents randomly selected from the population. If you want to periodically save population to disk specify save_to_disk=True. Also specify how often you want to save in save_every, and the path in save_path. Default is current working path.''' #Initialize the progress bar progress = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=num_games).start() if save_to_disk: # Make sure the path does not already exist while path.exists(save_path): try: old_file_num = int(save_path[save_path.find('_') + 1:]) new_file_num = old_file_num + 1 save_path = save_path[0:save_path.find('_')] + '_' + str( new_file_num) except ValueError: save_path = save_path + "_1" self.folder = save_path mkdir(save_path) self.population.save(path=save_path, suffix='0') #Pre-sample the stimuli stimuli = self.color_grid.sample( num_games, 2 ) # list of lists (len=2), where each element is a 2-tuple representing a chip #Run the games for i in range(num_games): #Randomly select the agents for a single game agent_keys = self.population.get_random_pair() #Save the game history #self.game_history.append([agent_keys, stimuli[i]]) # stimuli[i] = [chip1, chip2] where chip1,2 are 2-tuples #Play the game self.play_game(agent_keys, stimuli[i]) #Update the status bar progress.update(i + 1) #Save to disk if save_to_disk and (i % save_every) == 0: self.population.save(path=save_path, suffix=str(i)) # Save the last iteration if it was not already saved if save_to_disk and (i % save_every) != 0: self.population.save(path=save_path, suffix=str(i)) #End the status bar progress.finish()
# ################################################################################# import argparse import numpy as np from sklearn.model_selection import StratifiedKFold from progressbar import AnimatedMarker, Bar, BouncingBar, Counter, ETA, \ FileTransferSpeed, FormatLabel, Percentage, \ ProgressBar, ReverseBar, RotatingMarker, \ SimpleProgress, Timer # progress bar settings widgets = [ 'Progress: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA() ] def parseFileName(filepath): tokens = filepath.split("/") filename = tokens[len(tokens) - 1] tokens = filename.split(".") filename_no_ext = tokens[len(tokens) - 2] return filename_no_ext def parseOutputFolderPath(filepath):
def play_random_games(self, num_games, save_to_disk=False, save_every=100, save_path=''): '''Simulates a specified number of games where each game involves two agents randomly selected from the population. If you want to periodically save population to disk specify save_to_disk=True. Also specify how often you want to save in save_every, and the path in save_path. Default is current working path.''' #Initialize the progress bar progress = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=num_games).start() if save_to_disk: #Make sure the path does not already exist. If it does, a new file name will be created adding '_n' to the end of #save_path, where n is a positive integer and is determined by whatever the last instance of save_path was saved as. while path.exists(save_path): #save_path already exists try: old_file_num = int(save_path[save_path.find('_') + 1:]) new_file_num = old_file_num + 1 save_path = save_path[0:save_path.find('_')] + '_' + str( new_file_num) except ValueError: save_path = save_path + "_1" self.folder = save_path mkdir(save_path) self.population.save(path=save_path, suffix='0') # Pre-sample the stimuli stimuli = self.color_circle.sample([num_games, 2]) #Run the games for i in range(num_games): #Randomly select the agents and the stimuli for a single game agent_keys = self.population.get_random_pair() # Save the game history #self.game_history.append([agent_keys, stimuli[i]]) # Play the game self.play_game(agent_keys, stimuli[i]) #Update the status bar progress.update(i + 1) #Save to disk if save_to_disk and (i % save_every) == 0: self.population.save(path=save_path, suffix=str(i)) # Save the last iteration if it was not already saved if save_to_disk and (i % save_every) != 0: self.population.save(path=save_path, suffix=str(i)) #End the status bar progress.finish()
def NETSGraph(results, NETS_edges, node_labeler, node_type, edge_labeler): ''' Function takes a json file of query results, a list of NETS edges, node and edge metadata dictionaries, and a dictionary containing NETS edge information by BIO node. Using these items the function creates the directed OWL-NETS abstraction network. Node metadata includes: labels (a list of human readable labels); id (the endpoint database identifiers); and bio (the NETS node type). Edge metadata includes: labels (human readable label for the edge between two NETS nodes) and id (the ontology concept term used to link the NETS nodes). :param results: json file containing the query results from endpoint :param NETS_edges: list of lists, where each list is a NETS edge and the order specifies a directional relationship :param node_labeler: node metadata nested lists (list[0] contains the NETS nodes label triples, list[1] contains the contains the NETS nodes identifier triples) :param node_type: dictionary with BIO node as key and set of NETS node types as value :param edge_labeler: dictionary where the keys are the NETS edges and the values are the edge labels :return: OWL-NETS directed graph ''' print 'Started building OWL-NETS graph' # initialize progress bar progress bar widgets = [Percentage(), Bar(), FormatLabel('(elapsed: %(elapsed)s)')] pbar = ProgressBar(widgets=widgets, maxval=len(results['results']['bindings'])) NETS_graph = nx.DiGraph() for res in pbar(results['results']['bindings']): for edge in NETS_edges: i = res[str( edge[0].strip('?').encode('utf8'))]['value'].encode('utf8') j = res[str( edge[1].strip('?').encode('utf8'))]['value'].encode('utf8') # set nodes NETS_graph.add_node( min(node_labeler[edge[0].strip('?')][i]['label'], key=len), labels=node_labeler[edge[0].strip('?')][i]['label'], id=node_labeler[edge[0].strip('?')][i]['id'], bio=i, type='-'.join(list(node_type[i]))) # gets second node in edge NETS_graph.add_node( min(node_labeler[edge[1].strip('?')][j]['label'], key=len), labels=node_labeler[edge[1].strip('?')][j]['label'], id=node_labeler[edge[1].strip('?')][j]['id'], bio=j, type='-'.join(list(node_type[j]))) # add edge NETS_graph.add_edge( min(node_labeler[edge[0].strip('?')][i]['label'], key=len), min(node_labeler[edge[1].strip('?')][j]['label'], key=len), labels=res[(edge_labeler[tuple(edge)]['label'] ).strip('?')]['value'].encode('utf8'), id=(edge_labeler[tuple(edge)]['id']).strip('?'), edge='-'.join([edge[0].strip('?'), edge[1].strip('?')])) # closes first progress bar pbar.finish() print 'Finished building OWL-NETS graph' print '\n' # print information about graph print 'Directed OWL-NETS Graph has ' + str(len( NETS_graph.nodes())) + ' nodes, ' + str(len( NETS_graph.edges())) + ' edges, and ' + str( nx.number_connected_components( NETS_graph.to_undirected())) + ' connected component(s)' return NETS_graph
time = odata_pt[:, 1] tindex = abs(time - pf.current_time.v).argmin() if args.subsample >= 0 and pf.h.max_level - args.undersample < args.subsample: print 'ERROR: Subsample must be less than max refine level - undersample.' sys.exit() maxval = np.empty(len(args.vars)) minval = np.empty(len(args.vars)) maxval.fill(-float("inf")) minval.fill(float("inf")) vals = list() pbar = ProgressBar(widgets=[ 'Determining histogram bounds and initial pass of data: ', Percentage(), Bar(), ' ', ETA() ], maxval=len(pf.index.grids)).start() for cnt, g in enumerate(pf.index.grids): if g.Level > pf.h.max_level - args.undersample: continue if len(g.Children ) != 0 and g.Level != pf.h.max_level - args.undersample: continue evals = list() vvals = list() #vvals = g.get_data(args.var).ravel() for e, ev in enumerate(args.vars): vvals.append(g[ev].ravel()) dvals = g["dens"].ravel().v * g["c12 "].ravel().v
semester_codes = {'0': 'INTERIM', '1': 'SPRING', '6': 'SUMMER', '9': 'FALL'} default_pickup = { 'GEN': 'MUSME', 'BUS': 'BUS', 'MM': 'MUSME', 'HEALTH': 'HLTH', 'OXF': 'OXFD', 'CHEM': 'CHEM', 'THE': 'THEO', 'LAW': 'LAW' } # widget for progress bar pbar_widget = [Percentage(), ' ', ETA(), Bar()] def unnone(str): return str if str is not None else '' # get notes by type for later reference so it will not take 2 hours to run 82,000 seperate queries def get_notes(type, sep='; '): query = ''' SELECT n.target_id id, IFNULL(group_concat(n.note separator %s), '') notes FROM notes n WHERE n.type = %s GROUP BY n.target_id ''' cursor = db.cursor(MySQLdb.cursors.DictCursor)
idxs_max = np.argsort(rsss)[:CLUSTERKEYSIZE] topaps[i] = macs[idxs_max] joinaps.append('|'.join(topaps[i])) toprss[i] = '|'.join(rsss[idxs_max]) rawrmp[:,col_macs] = np.array(joinaps) rawrmp[:,col_rsss] = np.array(toprss) print 'Done' # Clustering heuristics. fp_field_names = FP_FIELD_NAMES['outdoor'] if 'lat' in csv_cols else FP_FIELD_NAMES['indoor'] idxs_fp = [ csv_cols[col] for col in fp_field_names ] idx_time = fp_field_names.index('time') idx_rsss = fp_field_names.index('rsss') n_inserts = { 'n_newcids':0, 'n_newfps':0 } if verb: widgets = ['Incr-Clustering: ',Percentage(),' ',Bar(marker=RotatingMarker())] pbar = ProgressBar(widgets=widgets, maxval=num_rows*10).start() for idx, wlanmacs in enumerate(topaps): # Drop FPs with no wlan info. if not wlanmacs[0].strip() and len(wlanmacs) == 1: continue fp = rawrmp[ idx, idxs_fp ] found_cluster, result = search_cluster(macs=wlanmacs, fp=fp, wppdb=wppdb, idx_rsss=idx_rsss) fp = result['fp'] # Strip time & rsss field. fp[idx_time] = fp[idx_time].replace(' ','') fp[idx_rsss] = fp[idx_rsss].replace(' ','') if not found_cluster: # Insert into cidaps/cfps with a new clusterid. new_cid = wppdb.addCluster(result['fp_macs']) wppdb.addFps(cid=new_cid, fps=[fp])