def add_info(files_path, core, core_http): core.delete(os.path.abspath(files_path)) id_List.Delete_list_path(files_path) with open(files_path, 'rb') as f: dicts = [] url = 'http://localhost:8983/solr/info/schema' text = f.readline() words = text.strip().split() # print words for word in words: add_field(url, word) fields = [n.decode('utf-8') for n in text.strip().split()] # print fields fields.extend([u'path', u'content', u'id']) # print fields for line in f.readlines(): words = [s.decode('utf-8') for s in line.strip().split()] id_num = id_List.Create_id() words.extend([ os.path.abspath(files_path).decode('utf-8'), line.decode('utf-8'), id_num ]) dicts.append(dict(zip(fields, words))) id_List.Create_list(files_path, id_num, core_http) core.add(dicts)
def add_desc(files_path, core, core_http): id_List.Delete_list_path(files_path) with open(files_path, 'rb') as f: text = f.read().decode('utf-8') id_num = id_List.Create_desc_id(files_path) core.add([{'id': id_num, 'path': os.path.abspath(files_path), 'context': text}]) id_List.Create_list(files_path, id_num, core_http)
def add_txt(files_path, core, core_http): core.delete(os.path.abspath(files_path)) id_List.Delete_list_path(files_path) dicts = [] with open(files_path, 'rb') as f: for i, line in enumerate(f): words = [s.decode('utf-8') for s in line.strip().split()] if i == 0: if len(words) == 4: fields = ('wav', 'start', 'end', 'text', 'path', 'id') elif len(words) == 2: fields = ('wav', 'text', 'path', 'id') else: print 'ERROR! ' + files_path + ' is not the correct format!' return id_num = id_List.Create_id() words.extend([os.path.abspath(files_path), id_num]) dicts.append(dict(zip(fields, words))) id_List.Create_list(files_path, id_num, core_http) core.add(dicts)
def Delete_info(file_path): core = pysolr.Solr('http://localhost:8983/solr/info') core.delete(q='path:"' + file_path +'"') # find_line = [s for s in id_List if s[1] == file_path] id_List.Delete_list_path(file_path)