def upload_to_s3(self, images): s3_client = boto3.client('s3') bucket = BUCKET key_left = os.getlogin() + '_Left' + '.jpg' s3_client.put_object(ACL='public-read', Bucket=bucket, Key=key_left, Body=cv2.imencode('.jpg', images[0])[1].tostring(), ContentType='image/jpeg') key_front = os.getlogin() + '_Front' + '.jpg' s3_client.put_object(ACL='public-read', Bucket=bucket, Key=key_front, Body=cv2.imencode('.jpg', images[1])[1].tostring(), ContentType='image/jpeg') key_right = os.getlogin() + '_Right_' + '.jpg' s3_client.put_object(ACL='public-read', Bucket=bucket, Key=key_right, Body=cv2.imencode('.jpg', images[2])[1].tostring(), ContentType='image/jpeg') keys = [key_left, key_front, key_right] Index.index_faces_keys(keys) response = wx.MessageBox('Upload Finished, Please Exit.', 'Result', wx.OK)
def checkbox_click(): if box.isChecked() == True: # print('checkbox for', self.path, 'is true') if 'tags' not in self.paper: self.paper['tags'] = [] if box.text() not in self.paper['tags']: self.paper['tags'].append(box.text()) Index.save_json(Index.gJSONfilename) # self.owner.PapersView = Index.gPapers.copy() # self.owner.update() self.owner.copy_sort_update() # for paper in Index.gPapers: # if paper['path'] == self.path: # if 'tags' not in paper: # paper['tags'] = [] # if box.text() not in paper['tags']: # paper['tags'].append(box.text()) # Index.save_json(Index.gJSONfilename) # break else: print('checkbox', box.text(), 'for', self.path, 'is false') if 'tags' not in self.paper: self.paper['tags'] = [] if box.text() in self.paper['tags']: self.paper['tags'].remove(box.text()) Index.save_json(Index.gJSONfilename) # self.owner.PapersView = Index.gPapers.copy() # self.owner.update() self.owner.copy_sort_update()
def search(self, query, limit): # build a new idictionary distances = {} # read all index from db index_obj = Index() data_list = index_obj.read_all_features_from_Index() # loop over rows in data list # and compute distance between query and row's feature for fid, feature in data_list: # extract features out from csv and convert back to numeric features = [float(x) for x in feature.strip('[]').split(',')] # compute distance between query and row's feature dist = self.calc_distance(features, query) distances[fid] = dist print "all distances from query as dict:" print distances # sort the dictionary, return a list of tuples (id, distance) # smaller distances implies more relevant images distances = sorted(distances.items(), key=operator.itemgetter(1)) print "sorted distances as list of tuple:" print distances # return top k records return distances[:limit]
def test11(self): """ test reading values from disk with external names """ pcraster.setclone("clone.map") Plants = Index.Index(["TG=TallGrass", "SG=ShortGrass"]) PlantsAvailable = vcMod.VariableCollection( [Plants], value=vcMod.ValueFromParameterTable("PlantsAvailable", "parameterFile.tbl", pcraster.Scalar)) self.assertEqual(PlantsAvailable[Plants.TG], 1.3) self.assertEqual(PlantsAvailable[Plants.SG], 4.5) Herbivores = Index.Index([ "Cow=CowWithLongName", "Horse=HorseWithLongName", "Sheep=SheepWithLongName" ]) InteractionExt = vcMod.VariableCollection( [Herbivores, Plants], value=vcMod.ValueFromParameterTable("InteractionExt", "parameterFile.tbl", pcraster.Scalar)) self.assertEqual(InteractionExt[Herbivores.Cow, Plants.TG], 0.6) self.assertEqual(InteractionExt[Herbivores.Cow, Plants.SG], 0.67) self.assertEqual(InteractionExt[Herbivores.Horse, Plants.TG], 0.73) self.assertEqual(InteractionExt[Herbivores.Horse, Plants.SG], 0.74) self.assertEqual(InteractionExt[Herbivores.Sheep, Plants.TG], 0.87) self.assertEqual(InteractionExt[Herbivores.Sheep, Plants.SG], 0.89)
def andQuery(query, k, index, v): # Construct the vector normalized used by kmeans algorithm, and return the word cloud of cluster. cluster = [] u = getIntersection(query, index, v) mergeddata = [] for file in u: song = ind.getText(file) tv = text_in_vector(song, index, v, file) norm = numpy.linalg.norm(tv) tv_norm = [i / norm for i in tv] cluster.append(tv_norm) kmeans = KMeans(n_clusters=k, init='random') try: kmeans.fit(cluster) c = kmeans.predict(cluster) for row in range(0, len(cluster)): line = cluster[row] line.append(int(c[row])) mergeddata.append(line) text = "" for i in range(k): print("Cluster " + str(k)) for file in u: song = ind.getText(file) text = text + str(song) wordCloud(text) text = "" return c except: print('NO MATCH!')
def create_query_vector(invertd_file, query): query_terms = Index.tokenize(query) query_terms = Index.normalize(query_terms) query_terms = Index.stem(query_terms) # only use terms that already in vocabulary; query_terms = [x for x in query_terms if x in invertd_file.keys()] # Count query term frequency query_tf = {} for i in query_terms: if i not in query_tf: query_tf[i] = 1 else: query_tf[i] += 1 # Generate query vector; query_vector = [] for key, values in invertd_file.items(): if type(values) is str: continue else: if key not in query_tf: query_vector.append(0) else: query_vector.append(query_tf[key] * values[-1]) return query_vector
def insert(table_name: str, values: list): time_start = time.time() Catalog.not_exists_table(table_name) Catalog.check_types_of_table(table_name, values) linenum = Buffer.insert_record(table_name, values) Index.insert_into_table(table_name, values, linenum) time_end = time.time() print(" time elapsed : %fs." % (time_end - time_start))
def delete(table_name: str, where: list = None): time_start = time.time() Catalog.not_exists_table(table_name) Catalog.check_select_statement(table_name, ['*'], where) # 从insert中借用的方法 col = Catalog.get_column_dic(table_name) pklist = Buffer.delete_record(table_name, col, where) Index.delete_from_table(table_name, pklist) time_end = time.time() print(" time elapsed : %fs." % (time_end - time_start))
def create_table(table_name: str, attributes: list, pk: str): time_start = time.time() Catalog.exists_table(table_name) Index.create_table(table_name) Catalog.create_table(table_name, attributes, pk) Buffer.create_table(table_name) time_end = time.time() print("Successfully create table '%s', time elapsed : %fs." % (table_name, time_end - time_start))
def drop_table(table_name: str): time_start = time.time() Catalog.not_exists_table(table_name) Catalog.drop_table(table_name) Buffer.drop_table(table_name) Index.delete_table(table_name) time_end = time.time() print("Successfully drop table '%s', time elapsed : %fs." % (table_name, time_end - time_start))
def add_paper_button_click(self): text, ok = QInputDialog().getText(self, "Enter Corpus ID", "Enter semantic scholar Corpus ID:", QLineEdit.Normal, "") if ok and text: Index.add_paper_by_corpus_id(text) # self.update_selected_paper_info() # self.PapersView = Index.gPapers.copy() # self.update() self.copy_sort_update()
def Compressed_Sparse_Row(Matrix): """ This function calculate the Compressed Sparse Row of a given Matrix. """ anz = [] ja = [] ia = [] ZRow = [] # We keep the indexes of the zero rows SparseM = SMatrix.Return_Sparse_Matrix( Matrix ) # SparseM contains the dictionary which is returned by the Return_Sparse_Matrix function. sortedSparseM = sorted( SparseM ) # sorted(SparseM) returns only the keys of the dictionary in sorted order. ZR = [ 0 for i in range(0, NumberOfVariables(Matrix)) ] # Creates a list of zero as many as the number of variables that the problem contains, using list comprehension. ZRow = Index.getAllIndexes( Matrix, ZR ) # ZRow contains all the indexes of rows that all elements are zero. for row in Matrix: Lock = 1 # This is a lock boolean variable, that doesn't allow other element in the same row except from the first one to be appended in the IA list for elmt in row: if (elmt != 0): anz.append(elmt) if (Lock == 1): ia.append(Index.getLastIndex(anz, elmt) + 1) Lock = 0 """ This loop iterates the list of tuples, which is the coordinates of the non-zero elements in the table. We take only the column index, we increase it by one to be human readable and we add it to the JA list. """ for key in sortedSparseM: ja.append(key[1] + 1) """ | Explanation of the expression "not (not ZRow)" |: The following loop is the COPY BACK TECHNIQUE, if the matrix has zero rows. If the ZRow is empty the expression (not ZRow) returns "True". Is a easy way to check if a list is empty. So, the expression "not (not ZRow)" means that if the ZRow is not empty then execute the for-loop. """ if (not (not ZRow)): for i in ZRow: ia.insert(i, ia[i]) ia.append(len(anz) + 1) print("ANZ : {}\nJA : {}\nIA :{}".format(anz, ja, ia)) # We print the result.
def text_in_vector(text_song, invIndex, vocab, json_name): # Build number vector from lyrics song mapped on vocabulary for cosine similarity word_vector = [] text = ind.wordNorm(text_song) for v in vocab: if v in text: for (file, tf) in invIndex[str(vocab[v])]: if file == json_name: word_vector.append(tf * ind.idf(invIndex, vocab[v])) else: word_vector.append(0) return (word_vector)
def read_hdfs_index(self, list_raw_query): """ (1)Search in Index and find all needed HDFS paths. (2)If (1) is successful,insert HDFS paths into index :return: """ index = Index() list_hdfs_paths = index.search_index(list_raw_query) if list_hdfs_paths is not None: conn_hdfs = HdfsConnector() list_files = conn_hdfs.open_hdfs(list_hdfs_paths) return list_files
def unionQuery(query, invIndex, vocab): # Union query return the 10 tuples (json_name, TF_IDF) of the searched query cosine = [] q = ind.wordNorm(query) for x in q: searched = (toSearch(x, invIndex, vocab)) vector_query = makeQuery(x, vocab) for (file, tf) in searched: song = ind.getText(file) tv = text_in_vector(song, invIndex, vocab, file) cosine.append([file, get_cosine(vector_query, tv)]) heapq._heapify_max([cos for (file, cos) in cosine]) return cosine[:10]
def cost(weight): sum = 0 for z in range(10): movie_file = 'training' + str(z) + '.csv' index_file = 'index' + str(z) + '.csv' test_file = 'testing' + str(z) + '.csv' index = Index.create_index(weight, movie_file, index_file) movie = Index.create_movie(test_file, index) for k in movie.keys(): temp = estimate(movie[k][0:5], weight) if (temp != 0): sum += (temp - movie[k][5]) * (temp - movie[k][5]) return sum
def reindex_button_click(self): text, ok = QInputDialog().getText(self, "Enter Corpus ID", "Enter semantic scholar Corpus ID:", QLineEdit.Normal, "") if ok and text: # Index.force_index_file(self.PapersView[self.selected_paper_index]['path'], self.selected_paper_index, text) Index.reindex_file_by_corpus_id( self.PapersView[self.selected_paper_index]['path'], text) # print(Index.gPapers[self.selected_paper_index]['authors']) # self.row_changed(self.selected_paper_index,0) self.update_selected_paper_info() Index.save_json(Index.gJSONfilename) # self.PapersView = Index.gPapers.copy() # self.update() self.copy_sort_update()
def test7(self): """ test reading some values from disk (1dim) """ pcraster.setclone("clone.map") Plants = Index.Index(["TG", "SG"]) QMax = vcMod.VariableCollection([Plants], value=vcMod.ValueFromParameterTable( "QMax", "parameterFile.tbl", pcraster.Scalar)) self.assertEqual(QMax[Plants.TG], 12000) self.assertEqual(QMax[Plants.SG], 18000) Cvr = vcMod.VariableCollection([Plants], value=vcMod.ValueFromParameterTable( "Cvr", "parameterFile.tbl", pcraster.Scalar)) self.assert_(isinstance(Cvr[Plants.TG], pcraster._pcraster.Field)) self.assert_(isinstance(Cvr[Plants.SG], pcraster._pcraster.Field)) kv = vcMod.VariableCollection([Plants], value=vcMod.ValueFromParameterTable( "kv", "parameterFile.tbl", pcraster.Nominal)) self.assertEqual(kv[Plants.TG], 3) self.assertEqual(kv[Plants.SG], 7)
def __init__(self): self._invIndex = Index.InvertedIndex() Frame.__init__(self) self.pack(fill=BOTH) self.master.title("Explorador de archivos (.txt)") self.master.geometry("325x100") self.frame1 = Frame(self) self.frame1.pack(pady=5) self.frame2 = Frame(self) self.frame2.pack(pady=5) self.infoDirectory = Label(self.frame1, text="Ingrese un directorio...") self.infoDirectory.pack(side=LEFT, padx=5) self.infoWord = Label(self.frame2, text="Ingrese una palabra") self.infoWord.pack(side=LEFT, padx=5) self.inputDirectory = Entry(self.frame1, name="inputDirectory") self.inputDirectory.bind("<Return>", self.add_Directory) #aqui va un add_Directory self.inputDirectory.pack(side=LEFT, padx=5) self.inputWord = Entry(self.frame2, name="inputWord") self.inputWord.bind("<Return>", self.search_Word) #aqui va un serchword self.inputWord.pack(side=LEFT, padx=5)
def from_node(self, node): uniq=0 for field in node.xpathEval('fields/field'): name = field.prop('name') assert name self.fields[name] = Field.new(self, name, field) for constraint in node.xpathEval('constraints/constraint'): raise NotImplementedError('you used a constraint! please implement them...') for index in node.xpathEval('indexes/index'): name = index.prop('name') if not name: uniq += 1 name = 'index_%04x' % uniq self.indexes[name] = Index.new(self, name, index) # the default for these are handled by Modeling itself self.className= node.prop ('class_name') self.moduleName= node.prop ('module_name') self.externalName= node.prop ('external_name') # of no use (yet) isAbstract= node.prop ('is_abstract') self.isAbstract= isAbstract and isAbstract.lower()=='true' ro= node.prop ('is_read_only') self.isReadOnly= ro and ro.lower()=='true'
def test7(self): """ test external names """ pcraster.setclone("clone.map") PlantSpecies = Index.Index(["TG=TallGrass", "SG=ShortGrass"]) self.assertEqual(PlantSpecies.__dict__["_values"], ['TG', 'SG']) self.assertEqual('TallGrass', PlantSpecies.__dict__["_externalNames"].get("TG")) self.assertEqual('ShortGrass', PlantSpecies.__dict__["_externalNames"].get("SG")) PlantSpecies = Index.Index(["TG = TallGrass", "SG = ShortGrass"]) self.assertEqual(PlantSpecies.__dict__["_values"], ['TG', 'SG']) self.assertEqual('TallGrass', PlantSpecies.__dict__["_externalNames"].get("TG")) self.assertEqual('ShortGrass', PlantSpecies.__dict__["_externalNames"].get("SG"))
async def manejar_archivo(archivo, writer): if archivo == '/': archivo = '/index.html' index_generado = bytearray(Index.generar(args['documentroot']), 'utf-8') pathsize = len(index_generado) await encabezado("OK", "html", pathsize, writer) else: archivo = args['documentroot'] + archivo try: if "favicon.ico" in archivo: archivo = "./web/favicon.ico" file = open(archivo, "rb") cod = "OK" except FileNotFoundError: archivo = args['documentroot'] + "/404error.html" file = open(archivo, "rb") cod = "NOT" except IsADirectoryError: raise DirectoryError("La direccion corresponde a un directorio") pathsize = pathlib.Path(archivo).stat().st_size await encabezado(cod, archivo.split(".")[-1], pathsize, writer) if archivo == '/index.html': writer.write(index_generado) else: texto = file.read(args['size']) while texto: writer.write(texto) texto = file.read(args['size'])
def getIntersection(query, invIndex, vocab): # Get intersection find the name of the shared songs of the query q = ind.wordNorm(query) doc = set(tup[0] for tup in toSearch(q[0], invIndex, vocab)) for x in q: doc2 = toSearch(x, invIndex, vocab) doc = set([tup[0] for tup in doc2]) & doc return doc
def delete_hdfs_index(self, list_hdfs_paths): """ (1)Delete files from HDFS first. (2)If (1) is successful, delete HDFS paths into index :return: """ #conn_hdfs = HdfsConnector() #flag_hdfs = conn_hdfs.delete_hdfs(self.dict_hdfs_info, list_hdfs_paths) flag_hdfs = True if flag_hdfs is True: # Try to delete items from postgresql index = Index() flag_pgsq = index.delete_items(self.dict_pg_info, list_hdfs_paths) if flag_pgsq is True: print("Delete from index successful!") return True return False
def set_beta(self): SP = Index.Index('^GSPC') stocks = [self, SP] portfolio = Portfolio.Portfolio(stocks) portfolio.get_covariance() cov_with_market = portfolio.get_covariance().iloc[0,1] market_var = SP.get_variance() self.beta = cov_with_market/market_var
def test3(self): """ test looping over indices """ pcraster.setclone("clone.map") PlantSpecies = Index.Index(["Species3", "Species1", "Species2"]) Coll = vcMod.VariableCollection([PlantSpecies], value=0) res = "" for plant in Coll: res += str(plant) self.assertEqual(res, "('Species3',)('Species1',)('Species2',)")
def makeQuery(input, vocab): # Build number vector from query mapped on vocabulary for cosine similarity query_vector = [] wordToFind = ind.wordNorm(input) for v in vocab: if v in wordToFind: query_vector.append(1) else: query_vector.append(0) return (query_vector)
def toSearch(input, invertedInd, vocab): # Splits and normalize inputs and return the list of the doc name with respective tf_idf wordToFind = ind.wordNorm(input) IDs = [] doclist = [] for word in wordToFind: IDs.append(vocab[word]) for id in IDs: doclist.extend(invertedInd[str(id)]) return (doclist)
def __init__(self, base=None, url=None): """ :param None|Index.Dir base: :param None|str url: """ if url: assert not base base = Index.Dir(url=url) if not base: base = Index.index.get_random_source() # type: Index.Dir self.base = base
def listfile(self): try: num = int(self._input('Peer Num : ')) id = self.vpn.get_peer_by_num(num)[0] tmp = Index(id).list_index() for i in tmp: sys.stdout.write("- %d : %s | size : %d bytes\n" % (i[0],i[1], i[2])) sys.stdout.flush() sleep(0.0001) except Exception as e: print "[Error]",e
class BigramCounter: def __init__(self, index_path): self.index = Index(index_path) self.cf = self.index.index_stats()[2] def work(self, text_piece): counts = {} if len(text_piece.tokens) < 2: return for i in xrange(len(text_piece.tokens) - 1): token_pair_string = '#1(%s)' % (' '.join(text_piece.tokens[i: i + 2])) counts[token_pair_string] = self.index.get_count(token_pair_string) token_set = list(set(text_piece.tokens)) for i in xrange(len(token_set) - 1): token1 = token_set[i] for j in xrange(i+1, len(token_set)): token2 = token_set[j] token_pair_string = '#uw8(%s %s)' % (token1, token2); counts[token_pair_string] = self.index.get_count(token_pair_string) text_piece.bigram_counts = counts text_piece.bigram_cf = self.cf
# check the arguments if not os.path.exists(args.input): log.error("Does %s exist?" % args.input) sys.exit() if args.output is not None and not os.path.exists(args.output): os.mkdir(args.output) if args.crawl is not None: ### CRAWLER crawler = Crawler(args.input, args.n, args.output, transforms, url_base) crawler.run() if args.post is not None: log.info("Posting the data in: %s" % args.input) i = Index(solr) # walk the path looking for the solr folder count = 0 for (dirpath, dirnames, filenames) in os.walk(args.input): if os.path.basename(dirpath) == 'solr': count += 1 log.info("Processing: %s: %s" % (count, dirpath)) for f in filenames: solr_doc = os.path.join(dirpath, f) doc = etree.parse(solr_doc) i.submit(etree.tostring(doc), solr_doc) i.commit()
#!/usr/bin/python # test db funcionality from Index import * import os index_obj = Index() data_dir = './dataset' #index_obj.write_img_path_into_Image(data_dir) data = index_obj.read_img_path_from_Image() # print data #feature = [0.20870997, 0.0012804293, 0.0, 0.01920644, 0.00076825754] #index_obj.write_all_features_into_Index(feature) ''' for f in data: # print f[0],f[1] ./dataset/xxx.png 101000 feature = [0.20870997, 0.0012804293, 0.0, 0.01920644, 0.00076825754] index_obj.write_all_features_into_Index(f[1],feature) ''' data = index_obj.read_all_features_from_Index() # note: data is a tuple of strings ('10001', '[...]') for fid, feature in data: print feature # note: feature is string formatted list => '[v1,v2,v3]' feature = [float(x) for x in feature.strip('[]').split(',')]
__author__ = r'lenovo' #coding:utf-8 import Index #mian 函数 test使用 # index = Index.index_inner() # # for e in res_list: # for a in e: # print a # print '\n' # # ##### # b = Index.index_inner() # a = [b] #print type(a) #测试人员类 ps1 = Index.person_info('liqiang','','this is Other',2,9) ps2 = Index.person_info('zhang1','页') lis = [ps1,ps2] for e in lis: print e
print (str(e)) print 'Usage: %s --dataset path/to/dataset' % sys.argv[0] sys.exit(2) data_dir = '' for opt, val in optlist: if opt == '--dataset': data_dir = val print data_dir print # initialize feature descriptor # set bin = 16, H= feature_descriptor = FeatureDescriptor((8, 12, 3)) # write all images' path into db index_obj = Index() index_obj.write_img_path_into_Image(data_dir) # read in list of filenames for all jpg images in directory data_list = index_obj.read_img_path_from_Image() # extract feature for each image in dataset directory and store into db for f in data_list: img = cv2.imread(f[0]) # read img by dir ./dataset/xxx.png imgID = f[1] feature = feature_descriptor.describe(img) # write id, features to output index_obj.write_all_features_into_Index(imgID, feature)
def __init__(self, index_path): self.index = Index(index_path) self.cf = self.index.index_stats()[2]