def write_to_file(output_file, prices): file_exists = os.path.isfile(output_file) with open(output_file, "a") as f: if not file_exists: f.write(",".join(sorted(prices[0].keys())) + "\n") for p in prices: sorted_values = list(p.items()) | sort() | select(lambda x: x[1]) f.write((sorted_values | select(str) | join(",")) + "\n")
def test_pipe(): def fib(): a, b = 0, 1 while True: yield a a, b = b, a + b # 计算小于4000000的斐波那契数中的偶数之和 amount = fib() | where(lambda x: x % 2 == 0) | take_while( lambda x: x < 4000000) | add() print(amount) # 读取文件,统计文件中每个单词出现的次数,然后按照次数从高到低对单词排序 with open('argparse.py') as f: fs = f.read() print(findall('\w+', fs)) print(fs | Pipe(lambda x: findall('\w+', x)) # | Pipe(lambda x: (i for i in x if i.strip())) | groupby(lambda x: x) | select(lambda x: (x[0], (x[1] | count))) | sort(key=lambda x: x[1], reverse=True))
if os.path.exists(tag_list_path): with open(tag_list_path) as tag_list_file: logging.info("Processing " + tag_list_path) for key in tag_list_file: key = key.strip().decode("utf-8") if key in vocabs: ktag_to_utags[key].add(key) if len(ktag_to_utags) == 0: raise BaseException("Cannot find tag.csv or labelled_word.csv") with open(unlabelled_word_path) as unlabelled_word_file: with open(result_path, "w") as result_file: for word in [ line.strip().decode("utf-8") for line in unlabelled_word_file ]: try: sim_tags_info = [ (tag, model.n_similarity([word], ktag_to_utags[tag])) for tag in ktag_to_utags.keys() ] | sort(key=lambda tag_info: tag_info[1], reverse=True) | as_list sim_tags_info = [ t[0] + "," + str(t[1]) for t in sim_tags_info[0:10] ] output_line = "\t".join([word] + sim_tags_info) result_file.write(output_line.encode("utf-8") + "\n") except Exception, ex: logging.exception(ex) logging.warning("Cannot tag the word " + word)
def upload_time(self): tmp = self._versions | pipe.map(lambda v: v.upload_time) \ | pipe.sort(reverse=True) | pipe.first return tmp
line = line.strip().decode("utf-8") if len(line.split("\t")) == 2: utag = line.split("\t")[0] ktag = line.split("\t")[1] if utag in vocabs: ktag_to_utags[ktag].add(utag) if os.path.exists(tag_list_path): with open(tag_list_path) as tag_list_file: logging.info("Processing " + tag_list_path) for key in tag_list_file: key = key.strip().decode("utf-8") if key in vocabs: ktag_to_utags[key].add(key) if len(ktag_to_utags) == 0: raise BaseException("Cannot find tag.csv or labelled_word.csv") with open(unlabelled_word_path) as unlabelled_word_file: with open(result_path, "w") as result_file: for word in [line.strip().decode("utf-8") for line in unlabelled_word_file]: try: sim_tags_info = [(tag, model.n_similarity([word], ktag_to_utags[tag])) for tag in ktag_to_utags.keys()] | sort( key=lambda tag_info: tag_info[1], reverse=True) | as_list sim_tags_info = [t[0] + "," + str(t[1]) for t in sim_tags_info[0:10]] output_line = "\t".join([word] + sim_tags_info) result_file.write(output_line.encode("utf-8") + "\n") except Exception, ex: logging.exception(ex) logging.warning("Cannot tag the word " + word)