def main(): try: file = sys.argv[1] if len(sys.argv) >1 else "./models/c101.txt" print "Running for instance " + file instance = Instance(readFile(file), 25) print "Generating routes..." measure(lambda : instance.generateRoutes(10000)) # Create a new model m = Model("roteamento") R = [x for x in range(len(instance.routes))] J = [x for x in range(1, len(instance.customers))] x = {} routes = instance.routes # Create variables for i in R: x[i] = m.addVar(lb=0, ub=1, vtype="I", name="x%s"%(i)) m.update() # Set objective m.setObjective(quicksum(routes[i].distance*x[i] for i in R), GRB.MINIMIZE) m.update() # Add constraint for i in range(1, len(instance.customers)): m.addConstr(quicksum(int(routes[j].isInRoute(i)) * x[j] for j in R) == 1, "Cliente esta na rota x%s"%(i)) m.update() m.addConstr(quicksum(x[i] for i in R) <= instance.vehicles, "Max veiculos") m.update() # Optimize model m.write("Modelo.lp") m.optimize() resultado = [] soma = 0 for i in range(len(m.getVars())): if(m.getVars()[i].x > 0): resultado.append(i) soma+= instance.routes[i].distance print "\nRoutes:" for r in resultado: print instance.routes[r].getId() print "\nDistance:", soma except GurobiError as e: print('Error code ' + str(e.errno) + ": " + str(e)) except AttributeError as e: print('Encountered an attribute error', e)
def closest_node_in_meters(node, nodes): nodes = np.asarray(nodes) lat1 = node[0] lon1 = node[1] dist_2 = [] for existingNode in nodes: lat2 = existingNode[0] lon2 = existingNode[1] dist_2.append(measure(lat1,lon1,lat2,lon2)) dist_2 = np.array(dist_2) return np.argmin(dist_2), np.min(dist_2)
def splitData(gps, numFrames, maxDistanceBetweenPoints): goodSequences = [] for year in gps: lats = gps[year]['lat'] lons = gps[year]['lon'] panoids = gps[year]['panoid'] orientation = gps[year]['orientation'] tmpSequence = [] if isinstance( lats, list ): # We have to check if there are multiple observations from a given year. for i in range(1, len(lats)): distanceFromPreviousPoint = measure(lats[i - 1], lons[i - 1], lats[i], lons[i]) if distanceFromPreviousPoint < maxDistanceBetweenPoints: # temporally append previous point tmpSequence.append([ year, lats[i - 1], lons[i - 1], panoids[i - 1], orientation[i - 1] ]) if len(tmpSequence) == numFrames: # Append the sequence to the acceptable sequences according to split goodSequences.append(tmpSequence) tmpSequence = [] else: # Since the points are order it will mean that it was not possible to find a sufficiently long sequence. Therefore we reset. tmpSequence = [] return goodSequences
def findSequencesWithDifferentTimeAndSamePlace( sequenceSplits, numDates, maxDistanceBetweenStartingPoints, usedPanoids, numFrame): # For each sequence for all the sequences with different dates # and add all the sequence with are close to each other to a list finalSequences = [] for outer in range(len(sequenceSplits)): unique = True for i in range(len(sequenceSplits[outer])): panoidOuter = sequenceSplits[outer][i][3] if panoidOuter in usedPanoids: unique = False if unique: yearOfOuter = sequenceSplits[outer][0][0] firstLatOuter = sequenceSplits[outer][0][1] firstLonOuter = sequenceSplits[outer][0][2] tmpUsedPanoids = [] for i in range(len(sequenceSplits[outer])): panoidOuter = sequenceSplits[outer][i][3] tmpUsedPanoids.append(panoidOuter) tmp = [] usedYears = [] usedYears.append(yearOfOuter) tmp.append(sequenceSplits[outer]) # we might as well already append it. Either we look everything through and dont find a matching sequence, and then it doesn't matter if we have it here. for inner in range(len(sequenceSplits)): yearOfInner = sequenceSplits[inner][0][0] # If any of the panoids are already used, we don't want to use the sequence. unique = True for i in range(len(sequenceSplits[inner])): panoidInner = sequenceSplits[inner][i][3] if panoidInner in usedPanoids: unique = False if panoidInner == "HCB210nwx_o9tMyHJ-rhfQ" and unique != False: stop = True # If it is a unique path combination. This condition is necessary to avoid doublicates if unique: if numFrame == 20: stop = True # If we have two sequence with different dates if yearOfInner not in usedYears: firstLatInner = sequenceSplits[inner][0][1] firstLonInner = sequenceSplits[inner][0][2] distance = measure(firstLatInner, firstLonInner, firstLatOuter, firstLonOuter) # And the distance between the first observations are smaller than a distance if distance < maxDistanceBetweenStartingPoints: tmp.append(sequenceSplits[inner]) usedYears.append(yearOfInner) for i in range(len(sequenceSplits[inner])): panoidInner = sequenceSplits[inner][i][3] tmpUsedPanoids.append(panoidInner) if len(tmp) == (numDates): finalSequences.append(tmp) tmp = [] for panoid in tmpUsedPanoids: usedPanoids.append(panoid) tmpUsedPanoids = [] return finalSequences, usedPanoids
def mainFunction(cityName): print(cityName) continueWithCity = True plotInterpolationResults = False plotPanoidsPositionResults = False plotChosenSequences = False download = True testing = False chains = ["mcdonalds", "cinema", "stadium"] baseDirectory = '/home/frederik/Desktop/dataset/{}/'.format(cityName) xxs = np.linspace(-0.1, 0.1, 100) for xx in xxs: cityCoords = [[19.409084 + xx, -99.145136 + xx], [42.357978 + xx, -71.060782 + xx], [40.414523 + xx, -3.705759 + xx], [51.521473 + xx, -0.105750 + xx], [48.852287 + xx, 2.346714 + xx], [-26.178069 + xx, 28.056847 + xx], [37.879586 + xx, -122.270670 + xx], [-22.896383 + xx, -43.277966 + xx], [35.67332 + xx, 139.775610 + xx], [-33.898333 + xx, 151.099035 + xx]] if cityName == 'Mexico City': cityCoord = cityCoords[0] apiKey = 'AIzaSyDi0_vwJYxxAWk_bHJnYb7DTvPYpk2YzGs' elif cityName == 'Boston': cityCoord = cityCoords[1] apiKey = 'AIzaSyDm-RuA_Fu6rzebvaN5CuD4znZvh4-x-Cc' elif cityName == 'Madrid': cityCoord = cityCoords[2] apiKey = 'AIzaSyDi0_vwJYxxAWk_bHJnYb7DTvPYpk2YzGs' elif cityName == 'London': cityCoord = cityCoords[3] apiKey = 'AIzaSyDXmwm0xlivvdNj2JB2aBMSCCCmD3sSW9g' elif cityName == 'Paris': cityCoord = cityCoords[4] apiKey = 'AIzaSyDXmwm0xlivvdNj2JB2aBMSCCCmD3sSW9g' elif cityName == 'Johannesburg': cityCoord = cityCoords[5] apiKey = 'AIzaSyDi0_vwJYxxAWk_bHJnYb7DTvPYpk2YzGs' elif cityName == "San Francisco": apiKey = 'AIzaSyDm-RuA_Fu6rzebvaN5CuD4znZvh4-x-Cc' cityCoord = cityCoords[6] elif cityName == "Rio de Janeiro": apiKey = 'AIzaSyBXldN3BcrGE3aTsbkHjoPCPa4RDlHatHE' cityCoord = cityCoords[7] elif cityName == "Tokyo": apiKey = 'AIzaSyCVM-TJNV4rU_i1tuq6enXi-ySYH4ruIl4' cityCoord = cityCoords[8] elif cityName == "Sydney": apiKey = 'AIzaSyDm-RuA_Fu6rzebvaN5CuD4znZvh4-x-Cc' cityCoord = cityCoords[9] lat = cityCoord[0] lon = cityCoord[1] numFrames = np.array(range(30, 4, -1)) # max 50 frames and min 5 frames numDates = 4 maxDistanceBetweenPoints = 20 # m maxDistanceBetweenStartingPoints = 30 # m # This enables os to continue with the same city eventhough the program is stopped. if continueWithCity: uniqueLabelCounter, totalDownloadedSequences, totalDownloadedSequencesKm, totalDownloadedSequencesPerLength = getCityInfo( baseDirectory) usedPanoids = getUsedPanoids(baseDirectory) else: uniqueLabelCounter = 0 # Used to label data such that each point in a radius of 15 m gets the same label. totalDownloadedSequencesKm = 0 # How many km we have downloaded totalDownloadedSequences = 0 # How many sequences we have download totalDownloadedSequencesPerLength = np.zeros( len(numFrames) ) # How many sequences per length: f.eks 1 sequence of 5 frame, 2 sequence of 6 frames, ... usedPanoids = [] routePoints = generateRoutes(chains, lat, lon, apiKey) print(cityName) for i, from_ in enumerate(routePoints): for j, to_ in enumerate(routePoints): if i != j: if measure( from_[0], from_[1], to_[0], to_[1] ) > 1000: # distance between points must be more than 1 km # This enables os to continue with the same city eventhough the program is stopped. currentFolders = os.listdir(baseDirectory) if 'from: {0} to: {1}'.format( str(from_), str(to_)) not in currentFolders: print(i, j, len(routePoints), cityName) #t0 = time.time() #print("Downloading street view images from " + str(from_) + " to "+ str(to_)) directory = baseDirectory + 'from: {0} to: {1}/'.format( str(from_), str(to_)) os.makedirs(directory) if not testing: lats, lons, pos = getGeoCoordinates( from_, to_, apiKey) labelPos = pos[:, :2] labels = list( range(uniqueLabelCounter, uniqueLabelCounter + len(labelPos))) uniqueLabelCounter += len(labels) #t1 = time.time() #print("We have the geo-coordinates of this route. It took " + str(t1-t0) + " seconds" ) if plotInterpolationResults: plt = plotInterpolation( pos, lats, lons, True) plt.show() ####### # Note that it takes a long time to get the panoids... ###### pointsOfInterest = getPointsOfInterest(pos) #t2 = time.time() #print("We have the points of interest of this route. It took " + str(t2 - t1) + " seconds") ########### # Sort by date ########## gps = sortPointsOfInterestByDate( pointsOfInterest, pos) if plotPanoidsPositionResults: plotPanoidsPosition(gps, pos) #t3 = time.time() #print("We have the sorted the points of this route. It took " + str(t3 - t2) + " seconds") # For testing and debugging #else: # gps = getGPS() allSequences = [] for i, numFrame in enumerate(numFrames): ######### # Get sequences ######### sequences, usedPanoids = getSequences( gps, numFrame, numDates, maxDistanceBetweenPoints, maxDistanceBetweenStartingPoints, usedPanoids) totalDownloadedSequencesPerLength[ i] = totalDownloadedSequencesPerLength[ i] + len(sequences) for sequence in sequences: allSequences.append(sequence) #t4 = time.time() #if not testing: # print("We have gotten the sequences this route. It took " + str(t4 - t3) + " seconds") ############# # Download images ############## displayResults(allSequences, download, directory, apiKey, cityCoord, labels, labelPos, cityName) #t5 = time.time() #print("We have the download the images this route. It took " + str(t5 - t4) + " seconds") #print("The total time of this route was " + str(t5 - t0)) totalDownloadedSequences += len(allSequences) distance, distancesOfEachSequence, framesOfEachSequence = calculateDistanceOfSequences( allSequences) totalDownloadedSequencesKm += distance print(cityName) print("We have now downloaded at total of " + str(totalDownloadedSequences) + " sequences!") print("We have now downloaded at total of " + str(totalDownloadedSequencesKm) + " km of sequences!") for i, index in enumerate( totalDownloadedSequencesPerLength): print( "There are " + str(index) + " sequences of " + str((len(totalDownloadedSequencesPerLength) - i) + 4) + " frames") print() dumpUsedPanoids(usedPanoids, baseDirectory) dumpCityInfo(uniqueLabelCounter, totalDownloadedSequences, totalDownloadedSequencesKm, totalDownloadedSequencesPerLength, baseDirectory) # Create summary file from path summaryArray = {} summaryArray['name'] = 'from: {0} to: {1}'.format( str(from_), str(to_)) summaryArray['numberOfSequenceSets'] = len( allSequences) summaryArray['totalKm'] = distance sequenceDetails = {} for idx, sequenceSet in enumerate(allSequences): dates = getYearsOfSequenceSet( directory + 'sequenceSet{}/dates.txt'.format(idx)) context = {} context['frames'] = framesOfEachSequence[idx] context['km'] = distancesOfEachSequence[idx] context['dates'] = dates sequenceDetails["sequence{}".format( idx)] = context summaryArray['sequenceSets'] = sequenceDetails # Dump result to json file with open(directory + '/description.json', 'w') as fp: json.dump(summaryArray, fp, indent=4)
annealing=False, batch_size=batch_size, prefix=PREFIX, label=label, scale=config['scale'], patience=config['patience'] ) # res_file = PREFIX+'_res.h5' # res_data = h5py.File( name=res_file,mode='r' ) # dim2 = res_data['RES5'] # print(np.max(dim2)) print(res.shape) k = len( np.unique(label) ) cl,_ = clustering( res,k=k) dm = measure( cl,label ) # res_data.close() ### analysis results # plot loss # plot 2-D visulation fig = print_2D( points=res,label=label,id_map=id_map ) # fig.savefig('embryo.eps') # fig = print_2D( points=res_data['RES5'],label=label,id_map=id_map ) # fig.show() # res_data.close() # time.sleep(30) #res_data.close() # plot NMI,ARI curve #
totalDownloadedSequencesPerLength = np.zeros( len(numFrames) ) # How many sequences per length: f.eks 1 sequence of 5 frame, 2 sequence of 6 frames, ... usedPanoids = [] for city in cities: lat = city[0] lon = city[1] routePoints = generateRoutes(chains, lat, lon, apiKey) print(routePoints) for i, from_ in enumerate(routePoints): for j, to_ in enumerate(routePoints): if i != j: if measure( from_[0], from_[1], to_[0], to_[1] ) > 1000: # distance between points must be more than 1 km # This enables os to continue with the same city eventhough the program is stopped. currentFolders = os.listdir(baseDirectory) if 'from: {0} to: {1}'.format( str(from_), str(to_)) not in currentFolders: t0 = time.time() print("Downloading street view images from " + str(from_) + " to " + str(to_)) directory = baseDirectory + 'from: {0} to: {1}/'.format( str(from_), str(to_))
m.addConstr(quicksum(int(routes[j].isInRoute(i)) * x[j] for j in R) == 1, "Cliente esta na rota x%s"%(i)) m.update() m.addConstr(quicksum(x[i] for i in R) <= instance.vehicles, "Max veiculos") m.update() # Optimize model m.write("Modelo.lp") m.optimize() resultado = [] soma = 0 for i in range(len(m.getVars())): if(m.getVars()[i].x > 0): resultado.append(i) soma+= instance.routes[i].distance print "\nRoutes:" for r in resultado: print instance.routes[r].getId() print "\nDistance:", soma except GurobiError as e: print('Error code ' + str(e.errno) + ": " + str(e)) except AttributeError as e: print('Encountered an attribute error', e) measure(main)
def vasc(expr, epoch=5000, latent=2, patience=50, min_stop=500, batch_size=32, var=False, prefix='test', label=None, log=True, scale=True, annealing=False, tau0=1.0, min_tau=0.5, rep=0): ''' VASC: variational autoencoder for scRNA-seq datasets ============ Parameters: expr: expression matrix (n_cells * n_features) epoch: maximum number of epochs, default 5000 latent: dimension of latent variables, default 2 patience: stop if loss showes insignificant decrease within *patience* epochs, default 50 min_stop: minimum number of epochs, default 500 batch_size: batch size for stochastic optimization, default 32 var: whether to estimate the variance parameters, default False prefix: prefix to store the results, default 'test' label: numpy array of true labels, default None log: if log-transformation should be performed, default True scale: if scaling (making values within [0,1]) should be performed, default True annealing: if annealing should be performed for Gumbel approximation, default False tau0: initial temperature for annealing or temperature without annealing, default 1.0 min_tau: minimal tau during annealing, default 0.5 rep: not used ============= Values: point: dimension-*latent* results A file named (*prefix*_*latent*_res.h5): we prefer to use this file to analyse results to the only return values. This file included the following keys: POINTS: all intermediated latent results during the iterations LOSS: loss values during the training procedure RES*i*: i from 0 to 14 - hidden values just for reference We recommend use POINTS and LOSS to select the final results in terms of users' preference. ''' expr[expr < 0] = 0.0 if log: expr = np.log2(expr + 1) if scale: for i in range(expr.shape[0]): expr[i, :] = expr[i, :] / np.max(expr[i, :]) # if outliers: # o = outliers_detection(expr) # expr = expr[o==1,:] # if label is not None: # label = label[o==1] if rep > 0: expr_train = np.matlib.repmat(expr, rep, 1) else: expr_train = np.copy(expr) vae_ = VASC(in_dim=expr.shape[1], latent=latent, var=var) vae_.vaeBuild() #print_summary( vae_.vae ) points = [] loss = [] prev_loss = np.inf #tau0 = 1. tau = tau0 #min_tau = 0.5 anneal_rate = 0.0003 for e in range(epoch): cur_loss = prev_loss #mask = np.ones( expr_train.shape,dtype='float32' ) #mask[ expr_train==0 ] = 0.0 if e % 100 == 0 and annealing: tau = max(tau0 * np.exp(-anneal_rate * e), min_tau) print(tau) tau_in = np.ones(expr_train.shape, dtype='float32') * tau #print(tau_in.shape) loss_ = vae_.vae.fit([expr_train, tau_in], expr_train, epochs=1, batch_size=batch_size, shuffle=True, verbose=0) train_loss = loss_.history['loss'][0] cur_loss = min(train_loss, cur_loss) loss.append(train_loss) #val_loss = -loss.history['val_loss'][0] res = vae_.ae.predict([expr, tau_in]) points.append(res[5]) if label is not None: k = len(np.unique(label)) if e % patience == 1: print("Epoch %d/%d" % (e + 1, epoch)) print("Loss:" + str(train_loss)) if abs(cur_loss - prev_loss) < 1 and e > min_stop: break prev_loss = train_loss if label is not None: try: cl, _ = clustering(res[5], k=k) measure(cl, label) except: print('Clustering error') # ### analysis results #cluster_res = np.asarray( cluster_res ) points = np.asarray(points) aux_res = h5py.File(prefix + '_' + str(latent) + '_res.h5', mode='w') #aux_res.create_dataset( name='EXPR',data=expr ) #aux_res.create_dataset( name='CLUSTER',data=cluster_res ) aux_res.create_dataset(name='POINTS', data=points) aux_res.create_dataset(name='LOSS', data=loss) count = 0 for r in res: aux_res.create_dataset(name='RES' + str(count), data=r) count += 1 aux_res.close() return res[5]