def drop(self,databases): items = map(int, self.LB1.curselection()) ids = self.LB1.curselection() list_directory = [] for id in ids: list_directory.append(databases[int(id)]) database.drop(list_directory) self.list_directory_refresh() infoCallBack("ok")
def rescan(): scanner = Scanner() scanner.existing_tracks = Track.indexByPath(Track().getAllByPath( root_dir, '')) database.drop() database.connect() Track().create_table() Folder().create_table() scanner.parse(root_dir)
help='Get all pages') args = parser.parse_args() # print args if args.debug: logger.setLevel(logging.DEBUG) elif args.verbose: logger.setLevel(logging.INFO) if args.verbose == 1: logger.setLevel(logging.WARNING) if args.quiet: logger.setLevel(logging.CRITICAL) if args.drop: database.drop() if args.create: database.create() if not args.drop: items_to_process = [] if args.update: items_to_update = update.read_items_to_update(args.upfile) # logger.debug(items_to_update) items_to_process = get_items_with_info(items_to_update) else: items_to_process = get_items_from_cli(args.items) save_authority_codes(items=items_to_process)
def similarity_classifier_knn(db, test, nncl, **kwargs): """Compares the test point with the database and returns a pre-defined number of best matches, according to the chosen similarity metrics. Args: db (pandas df): Reference database to use for the search test (pandas df): Test cluster nncl (int): Number of total clusters kwargs metrics (string): 'Label1' to use probability or 'Label2' to use similarity to find best matches kwargs flatten (float): n-root to take for all the values in the list of best matches. Makes the difference between the first guess and the last guess smaller kwargs first_values (int): How many of the best guesses to keep in the list to return kwargs function (string): Similarity function. 'cosine', 'euclidean', 'correlation' and 'manhattan' are accepted. Returns: pandas dataframe: List of the N most probable locations, with coordinates """ metrics = kwargs['metrics'] if 'metrics' in kwargs else 'Label1' flatten = kwargs['flatten'] if 'flatten' in kwargs else 1.0 first_values = kwargs['first_values'] if 'first_values' in kwargs else 10 function = kwargs['function'] if 'function' in kwargs else 'cosine' db_sparse = db.drop(columns=['cLat','cLon','rLat','rLon'])#,'Label1' if metrics == 'Label2' else 'Label2']) test = test.drop(['cLat','cLon','rLat','rLon','Label1'])#,'Label2']) v_test = test.tolist() #calculate the similarity for every point in the comparison database sim = [[] for i in range(0,nncl+1)] for i, row in db_sparse.iterrows(): label = int(row[metrics]) row = row.drop(metrics) #create vector v1 = row.tolist() if(function == 'cosine'): sim[label].append(cosine_similarity(v1,v_test)) elif(function == 'euclidean'): sim[label].append(euclidean_similarity(v1,v_test)) elif(function == 'correlation'): sim[label].append(correlation_similarity(v1,v_test)) elif(function == 'manhattan'): sim[label].append(manhattan_similarity(v1,v_test)) else: print("ERROR: Please specify a correct classification function. Accepted functions: 'cosine', 'euclidean', 'manhattan', correlation'") #list the most probable clusters with similarity index cluster_sim = [] for c in sim: if len(c) > 0: cluster_sim.append((np.mean(c),np.var(c))) else: cluster_sim.append((0,0)) cluster_stat = pd.DataFrame(data=cluster_sim,columns=['Mean Similarity','Variance']) cluster_stat.sort_values(by='Mean Similarity',ascending=False,inplace=True) #ignore warnings in the next part warnings.filterwarnings("ignore") #rescale and norm for the first 10 clusters --> probabilities head = cluster_stat.head(first_values+1) head['rescaled'] = ((head.loc[:,'Mean Similarity'] - head.loc[:,'Mean Similarity'].min()) / (head.loc[:,'Mean Similarity'].max()-head.loc[:,'Mean Similarity'].min()))**(1/flatten) head['Probability'] = head.loc[:,'rescaled'] / head.loc[:,'rescaled'].sum() head.drop(columns='rescaled',inplace=True) head.drop(head.index[first_values],inplace=True) #drop last index with probability 0 #print(head) #add coordinates of center center_coords = [] for index in head.index.values: c = db.loc[np.isclose(db[metrics],index)].reset_index(drop=True).loc[1,['cLat','cLon']] #print("Cluster {} - {} {}".format(index,c.loc['cLat'],c.loc['cLon'])) center_coords.append((c.loc['cLat'],c.loc['cLon'])) head = head.reset_index() coords_pd = pd.DataFrame(data=center_coords,columns=['Lat','Lon']) best_with_coords = pd.concat([head,coords_pd],axis=1) best_with_coords.rename(index=str, columns={"index":"Cluster ID"},inplace=True) return best_with_coords
print(f"SKIPPING - TABLE ALREADY EXISTS - {names} \n") dataList = [ ('Sipra Banerjee', '*****@*****.**', 9831790182, 55), ('Subir Banerjee', '*****@*****.**', 9831626265, 62), ('Swapna Mitra', '*****@*****.**', 9433851479, 57), ('Subhadeep Banerjee', '*****@*****.**', 7980207055, 21) ] database.insertData(TABLENAME, dataList=dataList) database.showAll(TABLENAME) dataList = [ ('Subhadeep Banerjee', '*****@*****.**', 7980207055, 21), ('Ria Gupta', '*****@*****.**', 5123456891, 22), ('Souma Mitra', '*****@*****.**', 6444848265, 21), ] database.insertData(TABLENAME, dataList=dataList) database.showAll(TABLENAME) data = ('Ria Gupta', '*****@*****.**', 5123456891, 22) database.deleteOne(TABLENAME, data) print("\nAfter Deleting\n") database.showAll(TABLENAME) database.drop(TABLENAME) print(f"Remaining Tables - {database.listTables()}")