def get_file_content(): if len(G_file_no_dict) == 0: print convert("索引号列表为空") conn = get_connection() if conn != 0: cursor = conn.cursor() result_file_name = (G_path_prefix + r"result.txt").decode('utf-8').encode('cp936') file = open( result_file_name, "a+" ) for file_uniq_no in G_file_no_dict.keys(): sql = "select d.mr_content from jhmr_file_content_TEXT d where d.file_unique_id = '%s'" % file_uniq_no #print sql try: cursor.execute(sql) content_list = get_content_from_lob(cursor) except cx_Oracle.DatabaseError: print convert("获取病历内容失败") return 0 for item in content_list: print G_file_no_dict[file_uniq_no] file.write(G_file_no_dict[file_uniq_no]) file.write("\r\n") item = item.decode('cp936').encode('utf-8') file.write(item) file.write("\r\n") close_connection(conn) file.close()
def loadAnagrams(): print("Loading Anagrams...") stime = time.time() file = open(anagramsFile, "r") for line in file: words = re.split("[^a-zA-Z0-9]", line) n = len(words) key = words[0] l = set() for i in xrange(2, n): l.add(words[i]) anagrams[key] = filter(None, l) file.close() print("Anagrams loaded: %d" % len(anagrams)) print("Time taken: %s" % (time.time() - stime))
def export_ramp_color_library(library, library_rules): dir_path = tempfile.mkdtemp(suffix='', prefix='tmp-library-') for library_rule in library_rules: try: resource_path = dir_path + "/" + library_rule + "/" os.makedirs(resource_path) i = 0 for color_ramp in library_rules[library_rule]: file = open( resource_path + "/color_ramp-" + color_ramp.name + "-" + str(i) + ".rmf", 'w+') file.write( build_library_color_ramp(color_ramp.name, color_ramp.definition)) file.close() i = i + 1 except Exception as e: raise e buffer = StringIO.StringIO() z = zipfile.ZipFile(buffer, "w") relroot = dir_path for root, dirs, files in os.walk(dir_path): rel_path = os.path.relpath(root, relroot) if rel_path != ".": z.write(root, os.path.relpath(root, relroot)) for file in files: filename = os.path.join(root, file) if os.path.isfile(filename): arcname = os.path.join(os.path.relpath(root, relroot), file) z.write(filename, arcname) z.close() buffer.seek(0) response = HttpResponse(content_type='application/zip; charset=utf-8') response[ 'Content-Disposition'] = 'attachment; filename=' + library.name + '.zip' response.write(buffer.read()) utils.__delete_temporaries(dir_path) return response
def precomputeFromNLTK(): """ precompute with nltk's corpus as wordbase """ language = set() print(len(words.words())) for word in words.words(): word = word.lower() sortW = "".join(char for char in sorted(word)) if sortW[0] >= "a" and sortW[0] <= "z": word = word + ":" + sortW language.add(word) print("Loaded %d words from NLTK wordnet" % (len(language))) buckets = [set() for x in xrange(25)] for word in language: buckets[len(word) / 2].add(word) count = 0 for word in language: if count % 1000 == 0: print("Done for %d words" % count) count += 1 sortedW = word.split(":")[1] if sortedW not in nltkHashMap: nltkHashMap[sortedW] = set() for word2 in buckets[len(sortedW)]: sortedW2 = word2.split(":")[1] if sortedW == sortedW2: nltkHashMap[sortedW].add(word2.split(":")[0]) file = open(nltkAnagramsFile, "w") file.truncate() count = 0 for anagrams, listOfAnagrams in nltkHashMap.items(): if count % 1000 == 0: print("%d anagram lists written" % count) file.flush() count += 1 file.write("%s:%s\n" % (anagrams, listOfAnagrams)) file.close() print("Precomputation with NLTK done")
def close(self): file.close(self) self.__class__.open_files.remove(self)
yesterday_emails = list() #gather emails of yesterday file = open(log_path, 'r') for line in file.readlines(): date = line.split(" ")[0].strip() if (date == yesterday_date.strip()): yesterday_emails.append(line) if (date == today_date.strip()): break file.close() # write yesterday emails to a file yesterday_emails_path = tmp_path + '_yesterday_mails' file = open(yesterday_emails_path, 'w') for line in yesterday_emails: file.write(line + "\n") file.close() # now its time to find spammers possible_spammers_file_path = tmp_path + '_spammers' cmd = "awk \'$3 ~ /^cwd/{print $3}\' /tmp/_yesterday_mails | sort | uniq -c | sed \"s|^ *||g\"| sort -nr "
def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() pass