if "" in img_url_res_checked: img_url_res_checked.remove("") # print(img_url_res_checked, len(img_url_res_checked)) return img_url_res_checked def output_to_file(results): with open("../poem_img_url_test.txt", "a") as file_object: for item in results: file_object.write(item) file_object.write("\n") if __name__ == '__main__': translator = Translator() poem_file = formator.input_from_file("../poem_with_keyword.txt") count = 0 for line in poem_file: poem, keyword = poem_keyword_split(line) cn_text = keyword_format(keyword) en_text = translate(translator, cn_text, "zh-cn", "en") uns_url_cn = generate_url(cn_text, "unsplash") uns_url_en = generate_url(en_text, "unsplash") uns_cn_html = unsplash_get(uns_url_cn) uns_en_html = unsplash_get(uns_url_en) img_url_to_display_cn = parser(uns_cn_html) img_url_to_display_en = parser(uns_en_html) img_url = img_url_to_display_cn + img_url_to_display_en
text_rank_unzip_list = list(zip(*text_rank)) text_rank_word = text_rank_unzip_list[0] text_rank_score = text_rank_unzip_list[1] return tf_idf_word, tf_idf_score, text_rank_word, text_rank_score def output_to_file(results): with open("poem_with_keyword.txt", "a") as file_object: for item in results: file_object.write(item) file_object.write("\n") if __name__ == '__main__': jieba.enable_parallel(4) poem_file = formator.input_from_file("poem_new.txt") for poem in poem_file: tf_idf_word, tf_idf_score, text_rank_word, text_rank_score = calculate_scores( ) repeat_words = list( set.intersection(set(tf_idf_word), set(text_rank_word))) words = tf_idf_word + text_rank_word scores = tf_idf_score + text_rank_score final_words = list(set(words) - set(repeat_words)) final_lst = [] # not repeat words for final_word in final_words: index = words.index(final_word) score = scores[index]
from src import formator def output_to_file(results): with open("../ready_2_db_v33.txt", "a", encoding="UTF-8") as file_object: for item in results: file_object.write(item) file_object.write("\n") if __name__ == '__main__': url_file = formator.input_from_file("../ready_2_db_v3.txt") count = 120570 for line in url_file: # if count == 120570: # count += 1 # output_to_file(line.strip()) # continue # else: infos = (str(line).strip()).split("\t") # print(infos[0]) number = "\"" + str(count) + "\"" print(number) infos[0] = number res = "\t".join(infos) # print(res) output_to_file(res) count += 1
from src import formator def output_to_file(results): with open("../ready_2_db_v3.txt", "a", encoding="UTF-8") as file_object: for item in results: file_object.write(item) file_object.write("\n") if __name__ == '__main__': url_file = formator.input_from_file("../poem_img_url_3.txt") count = 0 temp = [] for line in url_file: infos = (str(line).strip()).split("@") if count % 2 != 0: del infos[0] number = int((count + 1) / 2) temp.append(infos[0]) temp.append("") temp.append(str(number)) res = "\"" + "\"\t\"".join(temp) + "\"" print(res) output_to_file(res) temp = [] else: number = int((count / 2) + 1) temp = [str(number), infos[0], infos[1]] count += 1