def home(): if g.current_lang in LOCALES: return render_template('home.html', datamining=read_json_data('datamining.json'), news=read_json_data('news.json')) else: return abort(404)
class TestEmp(unittest.TestCase): @classmethod def setUpClass(cls): cls.session = requests.session() cls.login_url = app.PROJECT_URL + "/api/sys/login" cls.get_emp_url = app.PROJECT_URL + "/api/sys/user" cls.request_api = RequestApi() @classmethod def tearDownClass(cls): cls.session.close() @parameterized.expand( read_json_data(app.BASE_DIR + "/data/test_emp_data.json", method_name="test_login")) def test_01_login(self, case_name, request_body, success, code, message, http_code): ''' 登陆测试 :return: ''' print("*" * 30, "测试" + case_name, "*" * 30) header = {"Content-type": "application/json;charset=utf-8"} response = self.request_api.doPost(self.session, self.login_url, request_body, headers=header) app.TOKEN = "Bearer " + response.json().get("data") self.assertIn(message, response.json().get("message")) self.assertEqual(code, response.json().get("code")) @parameterized.expand( read_json_data(app.BASE_DIR + "/data/test_emp_data.json", method_name="test_emp")) def test_02_get_user(self, case_name, params, success, code, message, http_code): ''' 获取所有用户 :param case_name: :param params: :param success: :param code: :param message: :param http_code: :return: ''' print("*" * 30, "测试" + case_name, "*" * 30) headers = {"Authorization": app.TOKEN} response = self.request_api.doGet(self.session, self.get_emp_url, params=params, headers=headers) self.assertIn(message, response.json().get("message")) self.assertEqual(code, response.json().get("code"))
def post(self): config_data = utils.read_json_data(self.config_file) if not config_data: abort(404, message="No valid config file found.") args = config_parser.parse_args() args_dict = dict(args) for key in args_dict: if not args_dict[key]: continue if not key in config_data: abort(404, message="Category {} is not valid.".format(key)) pair = args_dict[key].split(':') if not len(pair) == 2: abort( 404, message= "No valid config value provided. Format is str(parameter:value)." ) config_data[key][pair[0]] = pair[1] utils.write_json(config_data, self.config_file) return config_data, 200
def _to_translate(dataset, fold=1): # prepare for translation json_files = ["questions", "sentences"] for json_file in json_files: json_path = "glue_data/qnli/{}_{}.json".format(dataset, json_file) json_samples = read_json_data(json_path) n = len(json_samples) split_num = n // fold for i in range(0, n, split_num): split_above = i + split_num if split_above > n: split_above = n raw_text_path = "glue_data/qnli/en/{}_{}_raw_{}_{}.txt".format( dataset, json_file, i, split_above) ids_path = "glue_data/qnli/en/{}_{}_ids_{}_{}.txt".format( dataset, json_file, i, split_above) raw_f = open(raw_text_path, "w", encoding="utf-8") ids_f = open(ids_path, "w", encoding="utf-8") for j in range(i, split_above): raw_f.write("{}\n".format(json_samples[str(j)])) ids_f.write("{}\n".format(j)) raw_f.close() ids_f.close()
def do_notes(in_dir): """ Read tracks info inside the in_dir folder and write tracks info to the tracks.json file :param in_dir: Input dir :return None """ # create or read tracks json out_file = f"{in_dir}/tracks.json" if os.path.exists(out_file): out_json = read_json_data(out_file) else: out_json = {"instruments": {}} # we update this object instruments_json = out_json["instruments"] # get instruments info and update tracks json for path in glob.glob(f"{in_dir}/scores/*.mid"): instrument_name = path.split("\\")[-1].replace(".mid", "") instrument_info = get_instrument_info(path) instruments_json[instrument_name] = instrument_info # write tracks json write_json_data(out_file, out_json) print(f"Write file {out_file}")
class TestLogin(unittest.TestCase): @classmethod def setUpClass(cls): cls.login_url = app.PROJECT_URL + "/api/sys/login" cls.request_api = RequestApi() cls.header = {"Content-type": "application/json;charset=utf-8"} def setUp(self): self.session = requests.session() def tearDown(self): self.session.close() @parameterized.expand( read_json_data(app.BASE_DIR + "/data/test_login_data.json")) def test_login(self, case_name, request_body, success, code, message, http_code): ''' 登陆测试 :return: ''' print("*" * 30, "测试" + case_name, "*" * 30) response = self.request_api.doPost(self.session, self.login_url, request_body, headers=self.header) self.assertIn(message, response.json().get("message")) self.assertEqual(code, response.json().get("code"))
def students(): headers = [u'박사 수료', u'박사 과정', u'석사 과정', u'휴학생'] member_keys = ['phd_candidates', 'phd_students', 'ms_students', 'on_leave'] return render_template('students.html', students=read_json_data('members.json'), member_header_key_pairs=zip(headers, member_keys))
def main(): result = {} review_data = get_review_data() aspect_dict = read_aspect_dict() review_sentences, review_info = gen_review_processed_data( review_data, aspect_dict) all_data = read_json_data(JSON_PATH) for key, val in review_data.items(): result[key] = {} result[key]['citation'] = all_data[key]['citation'] result[key]['reviews'] = {} for aim in aim_list: result[key]['reviews'][aim] = [] for _ in range(len(val)): result[key]['reviews'][aim].append([]) for aim in aim_list: print(aim) sentiment_predictor.load_weights(os.path.join(SENTENCE_MODEL_PATH, aim)) result_label = analyse(review_sentences[aim]) for predict_result, info in zip(result_label, review_info[aim]): key, review_index = info[0], info[1] result[key]['reviews'][aim][review_index].append(predict_result) with open(os.path.join(RESULT_PATH, 'token_result.json'), 'w') as f: json.dump(result, f)
def get_review_data(): all_data = read_json_data(JSON_PATH) review_data = {} for key, val in all_data.items(): if 'reviews' in val: review_data[key] = val['reviews'] return review_data
def _vi_to_zalo(): squad_dir = "squad_data" zalo_samples = [] _id = 0 for file_name in ["vi_train-v2.0.json", "vi_dev-v2.0.json"]: file_path = "{}/{}".format(squad_dir, file_name) samples = read_json_data(file_path) for sample in samples["data"]: title = sample["title"] for p in sample["paragraphs"]: context = p["context"] for qa in p["qas"]: zalo_sample = { "id": "squad-{}".format(_id), "title": title, "question": qa["question"], "text": context, "label": not qa["is_impossible"], } zalo_samples.append(zalo_sample) _id += 1 out_path = "qna_data/squad.json" write_json_data(out_path, zalo_samples) print ("Write file {}".format(out_path))
def from_json(cls, file_path): vocab_data = read_json_data(file_path) return cls( word2id=vocab_data["word2id"], max_sent_len=vocab_data["max_sent_len"], )
def _vi_to_qnli(dataset, lang="vi"): squad_data_folder = "squad_data" json_path = "{}/{}_{}-v2.0.json".format(squad_data_folder, lang, dataset) json_examples = read_json_data(json_path) id_idx = 0 df_dict = { "index": [], "question": [], "sentence": [], "label": [], } for json_example in json_examples["data"]: for p in json_example["paragraphs"]: context = p["context"] for qa in p["qas"]: df_dict["index"].append(id_idx) df_dict["question"].append(_simple_preprocess(qa["question"])) df_dict["sentence"].append(_simple_preprocess(context)) df_dict["label"].append("not_entailment" if qa["is_impossible"] else "entailment") id_idx += 1 df = pd.DataFrame(df_dict) tsv_path = "{}/{}_{}.tsv".format(squad_data_folder, lang, dataset) _write_tsv(df, tsv_path) print ("Write file {}".format(tsv_path))
def cal_ave_citation_with_sentiment(): data_path = os.path.join(RESULT_DIR, 'processed_token_result.json') data = read_json_data(data_path) res = {} for aim in aim_list: res[aim] = {} res[aim]['citation'] = {} res[aim]['count'] = {} for _ in range(2): res[aim]['citation'][_] = 0 res[aim]['count'][_] = 0 for key, val in data.items(): for aspect, emotion in val['reviews'].items(): if emotion < 0: continue res[aspect]['citation'][emotion] += val['citation'] res[aspect]['count'][emotion] += 1 print(res) for key, val in res.items(): print(key) for _ in range(2): if _ == 0: print('负面情感平均引用') else: print('正面情感平均引用') if val['count'][_] == 0: print('zero count') else: print(val['citation'][_] / val['count'][_])
def load_qna_data(cls, method="normal", build_data=True, mode="train"): train_file = "qna_data/{}_train.json".format(method) test_file = "qna_data/{}_test.json".format(method) train_question_texts, train_paragraph_texts = [], [] train_labels = [] train_titles = [] test_question_texts, test_paragraph_texts = [], [] test_q_p_ids = [] test_json = read_json_data(test_file) pre_title_key = get_method_key("title", method) pre_question_key = get_method_key("question", method) pre_text_key = get_method_key("text", method) pre_paragraphs_key = get_method_key("paragraphs", method) if mode == "train": train_json = read_json_data(train_file) for train_sample in train_json: train_question_texts.append(train_sample[pre_question_key]) train_paragraph_texts.append(train_sample[pre_text_key]) train_labels.append(train_sample["label"]) train_titles.append(train_sample[pre_title_key]) for test_sample in test_json: for p in test_sample[pre_paragraphs_key]: test_question_texts.append(test_sample[pre_question_key]) test_paragraph_texts.append(p["text"]) test_q_p_ids.append((test_sample["__id__"], p["id"])) dataset = cls( train_question_texts=train_question_texts, train_paragraph_texts=train_paragraph_texts, train_labels=train_labels, train_titles=train_titles, test_question_texts=test_question_texts, test_paragraph_texts=test_paragraph_texts, test_q_p_ids=test_q_p_ids, method=method, ) if build_data: dataset.build_data() return dataset
def get(self): config_data = utils.read_json_data(self.config_file) if not config_data: abort(404, message="No valid config file found.") dirname = os.path.dirname(self.config_file) filename = os.path.basename(self.config_file) return send_from_directory(dirname, filename, attachment_filename=filename)
def _gen_wrong_dev(model_dir, details_file_name, json_data_path): details_path = "{}/{}.csv".format(model_dir, details_file_name) details_df = pd.read_csv(details_path) data_dict = read_json_data(json_data_path) wrong_path = "{}/{}_wrong.csv".format(model_dir, details_file_name) wrong_df = details_df.loc[details_df["correct"] == 0] _write_dev_details(wrong_df, wrong_path, data_dict) right_path = "{}/{}_right.csv".format(model_dir, details_file_name) right_df = details_df.loc[details_df["correct"] == 1] _write_dev_details(right_df, right_path, data_dict)
def _from_json(self): vocab_file = "qna_data/{}_vocab.json".format(self.method) dataset_file = "qna_data/{}_dataset.json".format(self.method) self.vocab = VocabEntry.from_json(vocab_file) dataset_json = read_json_data(dataset_file) for key in self.train_keys: setattr(self, key, dataset_json[key]) self._to_numpy()
def gen_datasets_token(): result_file = os.path.join(RESULT_DIR, 'old-dataset_token.json') data = read_json_data(os.path.join(DATASET_DIR, 'old-dataset-all/all.json')) for key, val in data.items(): reviews = val.get('reviews', None) if not reviews: continue res = [] for review in reviews: token = split_paragraph_into_sentence(review) res.append(token) data[key]['reviews'] = res with open(result_file, 'w') as f: json.dump(data, f)
class TestLogin(unittest.TestCase): @classmethod def setUpClass(cls): cls.login_api = LoginApi() @parameterized.expand( read_json_data(app.BASE_DIR + "/data/login_data.json")) def test_01login(self, case_name, request_body, success, code, message, http_code): json_data = request_body headers = {"Content-Type": "application/json"} response = self.login_api.login(json_data=json_data, headers=headers) # 调用断言方法 assert_text(self, http_code, success, code, message, response)
def _zalo_to_glue(file_name, pre_method="normal_cased"): parts = file_name.split("_") lang = parts[0] dataset_type = parts[1] json_file = "qna_data/{}.json".format(file_name) tsv_file = "qna_data/glue_data/{}/final/{}.tsv".format(lang, dataset_type) json_samples = read_json_data(json_file) glue_dict = { "index": [], "question": [], "sentence": [], "label": [], "pid": [], } id_pids = [] for idx, json_sample in enumerate(json_samples): glue_dict["index"].append(idx) glue_dict["question"].append( _pre_process_question( json_sample["{}_question".format(pre_method)])) glue_dict["sentence"].append( _pre_process_sentence(json_sample["{}_text".format(pre_method)])) glue_dict["label"].append( "entailment" if json_sample["label"] else "not_entailment") id_pid = "{}@{}".format(json_sample["id"], json_sample["pid"]) id_pids.append(id_pid) glue_dict["pid"].append(id_pid) glue_df = pd.DataFrame(glue_dict) if file_name == "vi_train": _split_train("qna_data/glue_data/vi/final", glue_df, "train90", "dev10") if file_name == "vi_btrain": _split_train("qna_data/glue_data/vi/final", glue_df, "btrain90", "bdev10") # divide in into k folds here # _split_k_folds(glue_df, lang) if "test" in file_name or "private" in file_name: _write_pids(id_pids, lang, dataset_type) _write_tsv(glue_df, tsv_file)
def preprocess_qna_data( self, method, bert_type, dataset_types, ): for dataset_type in dataset_types: data_file = "qna_data/en_{}.json".format(dataset_type) # Init features columns if self.for_train: features_columns = { "id": [], "question": [], "text": [], "label": [], "pid": [], } json_samples = read_json_data(data_file) for json_sample in json_samples: if self.for_train: features_columns["id"].append(json_sample["id"]) features_columns["label"].append(1 if json_sample["label"] else 0) features_columns["pid"].append(json_sample["pid"]) for key in ["question", "text"]: pre_key = "{}_{}_{}".format( method, bert_type, key ) pre_text, tokens_id = self.pre_process_text( json_sample[key], method, self.for_train ) json_sample[pre_key] = pre_text if self.for_train: features_columns[key].append(tokens_id) # samples with preprocessed keys write_json_data(data_file, json_samples) print ("{}. Length {}. Done write to file {}".format( dataset_type, len(json_samples), data_file )) # generate featured dataset if self.for_train: folder_name = "{}_{}".format(method, bert_type) self.write_features_columns( features_columns, folder_name, dataset_type )
def gen_bar(): data = read_json_data(DATA_PATH) cite_count = [] sentiment_count_pos = {} sentiment_count_neg = {} sentiment_count_all_pos = {} sentiment_count_all_neg = {} for aim in aim_list: sentiment_count_pos[aim] = [] sentiment_count_neg[aim] = [] sentiment_count_all_neg[aim] = [] sentiment_count_all_pos[aim] = [] upper_limit = 500 interval = 50 button_limit = 0 name_list = range(0, upper_limit + interval, interval) for _ in range((upper_limit // interval) + 1): cite_count.append(0) for aim in aim_list: sentiment_count_pos[aim].append(0) sentiment_count_neg[aim].append(0) sentiment_count_all_neg[aim].append(0) sentiment_count_all_pos[aim].append(0) for key, val in data.items(): if val['citation'] > upper_limit or val['citation'] < button_limit: continue for aim in aim_list: if val['reviews'][aim] < 0: continue if val['reviews'][aim] > 0: sentiment_count_pos[aim][val['citation'] // interval] += 1 else: sentiment_count_neg[aim][val['citation'] // interval] += 1 for aim in aim_list: for _ in range((upper_limit // interval) + 1): if sentiment_count_pos[aim][_] + sentiment_count_neg[aim][_] != 0: sentiment_count_all_pos[aim][_] = sentiment_count_pos[aim][_] / ( sentiment_count_pos[aim][_] + sentiment_count_neg[aim][_]) if sentiment_count_pos[aim][_] + sentiment_count_neg[aim][_] != 0: sentiment_count_all_neg[aim][_] = -sentiment_count_neg[aim][_] / ( sentiment_count_pos[aim][_] + sentiment_count_neg[aim][_]) for aim in aim_list: plt.bar(range(upper_limit // interval + 1), sentiment_count_all_pos[aim], fc='g', tick_label=name_list) plt.bar(range(upper_limit // interval + 1), sentiment_count_all_neg[aim], fc='r', tick_label=name_list) plt.title(aim) plt.show()
def __init__(self, lang, data_folder, model_type, lr, current_epoch=0, input_feature_cols=["input_features"]): self.lang = lang self.data_folder = data_folder self.model_type = model_type self.current_epoch = current_epoch self.input_feature_cols = input_feature_cols # bert model path for en if lang == "en": bert_model_path, _, _ = get_bert_paths(data_folder.split("_")[-1]) else: bert_model_path = None # combine from data_folder and model_type save_folder = "{}_{}".format(model_type, data_folder) self.checkpoint_path = "models/{}".format(save_folder) create_folder(self.checkpoint_path) saved_epoch_path = None if current_epoch > 0: saved_epoch = current_epoch - 1 saved_epoch_path = "{}/{}/{}".format(self.checkpoint_path, saved_epoch, saved_epoch) # create report folder self.report_folder = "reports/{}/{}".format( save_folder, saved_epoch) create_folder(self.report_folder) # get configs configs_file = "qna_data/pre_data/{}/configs.json".format(data_folder) configs = read_json_data(configs_file) self.model = get_model(lang=lang, model_type=model_type, bert_model_path=bert_model_path, saved_epoch_path=saved_epoch_path, configs=configs) self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
def process_old_sentiment_data(): data_path = os.path.join(RESULT_DIR, 'old_all_info.json') result_path = os.path.join(RESULT_DIR, 'processed_old_all_info.json') res = {} data = read_json_data(data_path) for key, val in data.items(): res[key] = {} res[key]['citation'] = val['citation'] res[key]['reviews'] = {} for aspect, sentiments in val['sentiment'].items(): temp = [] for sentiment in sentiments: emotion = get_sentiment(sentiment) temp.append(emotion) res[key]['reviews'][aspect] = get_final_sentiment(temp) with open(result_path, 'w') as f: json.dump(res, f)
def _build_vocab(self, vocab_file, method, cased): corpus = [] for dataset_type in ["train", "test"]: data_file = "qna_data/vi_{}.json".format(dataset_type) json_samples = read_json_data(data_file) for json_sample in json_samples: for key in ["question", "text"]: pre_key = "{}_{}_{}".format( method, cased, key ) pre_text = json_sample[pre_key] corpus.append(pre_text.split()) self.vocab = VocabEntry.from_corpus(corpus, freq_cutoff=3) self.vocab.save_json(vocab_file) print ("Save vocab to file {}".format(vocab_file))
def activities(): # return redirect("https://sites.google.com/a/dm.snu.ac.kr/snudm_seminar/") """ if request.method =='POST': if 'file' not in request.files: flash('파일이 없습니다.') return redirect(request.url) file = request.files['file'] if file.filename =='': flash('선택된 파일이 없습니다.') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) filename = datetime.today().strftime('%Y%m%d')+'-'+filename file.save(os.path.join(app.config['SEMINAR_FORDER'],filename)) return render_template('activities.html', seminar_data = read_seminar_data()) """ return render_template('activities.html', activities=read_json_data('activities.json'))
def _squad_to_examples(squad_json_file, start_id=0): json_data = read_json_data(squad_json_file) ids = [] questions = [] sentences = [] labels = [] for json_sample in json_data["data"]: for paragraph in json_sample["paragraphs"]: context = paragraph["context"] for qa in paragraph["qas"]: ids.append(start_id) questions.append(_pre_process_question(qa["question"])) sentences.append(_pre_process_sentence(context)) labels.append( "not_entailment" if qa["is_impossible"] else "entailment") start_id += 1 return ids, questions, sentences, labels
def get_titles(): dataset_types = ["train", "test"] titles_file = "qna_data/wiki_data/titles.txt" with open(titles_file, "r", encoding="utf-8") as f: titles = f.read().split("\n")[:-1] print("Number of titles before: ", len(titles)) for dataset_type in dataset_types: json_samples = read_json_data( "qna_data/vi_{}.json".format(dataset_type)) for json_sample in json_samples: title = json_sample["title"].strip().replace(" ", " ") if title and title not in titles: titles.append(title) print("Number of titles after: ", len(titles)) with open(titles_file, "w", encoding="utf-8") as f: for title in titles: f.write("{}\n".format(title))
def convert_data(dataset_type, include_txt=True): file_path = "qna_data/{}.json".format(dataset_type) data_json = read_json_data(file_path) converted_samples = [] for sample_json in data_json: converted_sample = None if dataset_type in ["train", "squad"]: converted_sample = sample_json converted_sample["pid"] = "p1" converted_samples.append(converted_sample) elif dataset_type in ["test", "private", "ltest"]: for p in sample_json["paragraphs"]: if 'label' in p: label = True if p['label'] == '1' else False else: label = False converted_sample = { "id": sample_json["__id__"], "title": sample_json["title"], "question": sample_json["question"], "text": p["text"], "label": label, "pid": p["id"] } converted_samples.append(converted_sample) new_file_path = "qna_data/vi_{}.json".format(dataset_type) write_json_data(new_file_path, converted_samples) print ("Length {}. Done write to file {}".format(len(converted_samples), new_file_path)) write_txt_for_translation(converted_samples, dataset_type) # write only vi files print ("Done write raw files for translation")
def convert_raw_en_to_json(dataset_type): raw_id_type_file = "qna_data/back_tran/raw_id_type_{}.txt".format(dataset_type) raw_en_file = "qna_data/back_tran/raw_vi_{}.txt".format(dataset_type) en_file = "qna_data/vi_{}.json".format(dataset_type) # for getting the title only vi_json_file = "qna_data/vi_{}.json".format(dataset_type[1:]) vi_json_samples = read_json_data(vi_json_file) en_json_samples = [] id_lines = [line.strip() for line in open(raw_id_type_file, "r", encoding="utf-8")] en_lines = [line.strip() for line in open(raw_en_file, "r", encoding="utf-8")] current_question = None text_idx = 0 for i, id_line in enumerate(id_lines): parts = id_line.split("\t") if parts[1] == "question": current_question = { "id": parts[0], "question": en_lines[i], } elif parts[1] == "text": en_json_sample = copy.deepcopy(current_question) en_json_sample["title"] = vi_json_samples[text_idx]["title"] en_json_sample["text"] = en_lines[i] en_json_sample["label"] = True if parts[3] == "True" else False en_json_sample["pid"] = parts[2] en_json_samples.append(en_json_sample) text_idx += 1 write_json_data(en_file, en_json_samples) print ("{}. Length {}. Done write to file {}".format( dataset_type, len(en_json_samples), en_file ))
def alumni(): return render_template('alumni.html', alumni=read_json_data('alumni.json'))
def admission(): return render_template('admission.html',\ admission=read_json_data('admission.json'))
def software(): return render_template('software.html',\ software=read_json_data('software.json'))
def topics(): return render_template('topics.html',\ menus=MENUS, topics=read_json_data('topics.json'))
def members(): return render_template('members.html',\ menus=MENUS, members=read_json_data('members.json'), alumni_phd=read_csv_data('alumni_phd.csv'), alumni_ms=read_csv_data('alumni_ms.tsv', sep='\t'))
def courses(): return render_template('courses.html', courses=read_json_data('courses.json'))
def faq(): return render_template('faq.html',\ menus=MENUS, faq=read_json_data('faq.json'))
def sponsors(): return render_template('sponsors.html',\ topics=read_json_data('sponsors.json'))
def datamining(): return render_template('datamining.html',\ datamining=read_json_data('datamining.json'))
def phd(): return render_template('degrees_phd.html', phd=read_json_data('degrees.json'))
def education(): return render_template('education.html',\ educations=read_json_data('education.json'))
def members(): return render_template('members.html',\ members=read_json_data('members.json'), alumni=read_json_data('alumni.json'))
def faq(): print request.view_args return render_template('faq.html',\ faq=read_json_data('faq.json'))
def admission(): return render_template('degrees_qna.html', admission=read_json_data('degrees.json'))
def masters(): return render_template('degrees_masters.html', masters=read_json_data('degrees.json'))