Example #1
0
def home():
    if g.current_lang in LOCALES:
        return render_template('home.html',
                               datamining=read_json_data('datamining.json'),
                               news=read_json_data('news.json'))
    else:
        return abort(404)
Example #2
0
def home():
    if g.current_lang in LOCALES:
        return render_template('home.html',
                               datamining=read_json_data('datamining.json'),
                               news=read_json_data('news.json'))
    else:
        return abort(404)
Example #3
0
class TestEmp(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.session = requests.session()
        cls.login_url = app.PROJECT_URL + "/api/sys/login"
        cls.get_emp_url = app.PROJECT_URL + "/api/sys/user"
        cls.request_api = RequestApi()

    @classmethod
    def tearDownClass(cls):
        cls.session.close()

    @parameterized.expand(
        read_json_data(app.BASE_DIR + "/data/test_emp_data.json",
                       method_name="test_login"))
    def test_01_login(self, case_name, request_body, success, code, message,
                      http_code):
        '''
        登陆测试
        :return:
        '''

        print("*" * 30, "测试" + case_name, "*" * 30)
        header = {"Content-type": "application/json;charset=utf-8"}
        response = self.request_api.doPost(self.session,
                                           self.login_url,
                                           request_body,
                                           headers=header)
        app.TOKEN = "Bearer " + response.json().get("data")
        self.assertIn(message, response.json().get("message"))
        self.assertEqual(code, response.json().get("code"))

    @parameterized.expand(
        read_json_data(app.BASE_DIR + "/data/test_emp_data.json",
                       method_name="test_emp"))
    def test_02_get_user(self, case_name, params, success, code, message,
                         http_code):
        '''
        获取所有用户
        :param case_name:
        :param params:
        :param success:
        :param code:
        :param message:
        :param http_code:
        :return:
        '''
        print("*" * 30, "测试" + case_name, "*" * 30)
        headers = {"Authorization": app.TOKEN}
        response = self.request_api.doGet(self.session,
                                          self.get_emp_url,
                                          params=params,
                                          headers=headers)
        self.assertIn(message, response.json().get("message"))
        self.assertEqual(code, response.json().get("code"))
Example #4
0
    def post(self):
        config_data = utils.read_json_data(self.config_file)
        if not config_data:
            abort(404, message="No valid config file found.")

        args = config_parser.parse_args()
        args_dict = dict(args)

        for key in args_dict:
            if not args_dict[key]:
                continue

            if not key in config_data:
                abort(404, message="Category {} is not valid.".format(key))

            pair = args_dict[key].split(':')
            if not len(pair) == 2:
                abort(
                    404,
                    message=
                    "No valid config value provided. Format is str(parameter:value)."
                )

            config_data[key][pair[0]] = pair[1]

        utils.write_json(config_data, self.config_file)

        return config_data, 200
Example #5
0
def _to_translate(dataset, fold=1):
    # prepare for translation
    json_files = ["questions", "sentences"]

    for json_file in json_files:
        json_path = "glue_data/qnli/{}_{}.json".format(dataset, json_file)
        json_samples = read_json_data(json_path)
        n = len(json_samples)
        split_num = n // fold

        for i in range(0, n, split_num):
            split_above = i + split_num
            if split_above > n:
                split_above = n
            raw_text_path = "glue_data/qnli/en/{}_{}_raw_{}_{}.txt".format(
                dataset, json_file, i, split_above)
            ids_path = "glue_data/qnli/en/{}_{}_ids_{}_{}.txt".format(
                dataset, json_file, i, split_above)
            raw_f = open(raw_text_path, "w", encoding="utf-8")
            ids_f = open(ids_path, "w", encoding="utf-8")

            for j in range(i, split_above):
                raw_f.write("{}\n".format(json_samples[str(j)]))
                ids_f.write("{}\n".format(j))

            raw_f.close()
            ids_f.close()
Example #6
0
def do_notes(in_dir):
    """
        Read tracks info inside the in_dir folder and write
        tracks info to the tracks.json file

        :param in_dir: Input dir

        :return None
    """
    # create or read tracks json
    out_file = f"{in_dir}/tracks.json"
    if os.path.exists(out_file):
        out_json = read_json_data(out_file)
    else:
        out_json = {"instruments": {}}

    # we update this object
    instruments_json = out_json["instruments"]

    # get instruments info and update tracks json
    for path in glob.glob(f"{in_dir}/scores/*.mid"):
        instrument_name = path.split("\\")[-1].replace(".mid", "")
        instrument_info = get_instrument_info(path)

        instruments_json[instrument_name] = instrument_info

    # write tracks json
    write_json_data(out_file, out_json)
    print(f"Write file {out_file}")
Example #7
0
class TestLogin(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.login_url = app.PROJECT_URL + "/api/sys/login"
        cls.request_api = RequestApi()
        cls.header = {"Content-type": "application/json;charset=utf-8"}

    def setUp(self):
        self.session = requests.session()

    def tearDown(self):
        self.session.close()

    @parameterized.expand(
        read_json_data(app.BASE_DIR + "/data/test_login_data.json"))
    def test_login(self, case_name, request_body, success, code, message,
                   http_code):
        '''
        登陆测试
        :return:
        '''
        print("*" * 30, "测试" + case_name, "*" * 30)
        response = self.request_api.doPost(self.session,
                                           self.login_url,
                                           request_body,
                                           headers=self.header)
        self.assertIn(message, response.json().get("message"))
        self.assertEqual(code, response.json().get("code"))
Example #8
0
def students():
    headers = [u'박사 수료', u'박사 과정', u'석사 과정', u'휴학생']
    member_keys = ['phd_candidates', 'phd_students', 'ms_students', 'on_leave']

    return render_template('students.html',
                           students=read_json_data('members.json'),
                           member_header_key_pairs=zip(headers, member_keys))
def main():
    result = {}
    review_data = get_review_data()
    aspect_dict = read_aspect_dict()
    review_sentences, review_info = gen_review_processed_data(
        review_data, aspect_dict)
    all_data = read_json_data(JSON_PATH)

    for key, val in review_data.items():
        result[key] = {}
        result[key]['citation'] = all_data[key]['citation']
        result[key]['reviews'] = {}
        for aim in aim_list:
            result[key]['reviews'][aim] = []
            for _ in range(len(val)):
                result[key]['reviews'][aim].append([])

    for aim in aim_list:
        print(aim)
        sentiment_predictor.load_weights(os.path.join(SENTENCE_MODEL_PATH,
                                                      aim))
        result_label = analyse(review_sentences[aim])
        for predict_result, info in zip(result_label, review_info[aim]):
            key, review_index = info[0], info[1]
            result[key]['reviews'][aim][review_index].append(predict_result)

    with open(os.path.join(RESULT_PATH, 'token_result.json'), 'w') as f:
        json.dump(result, f)
def get_review_data():
    all_data = read_json_data(JSON_PATH)
    review_data = {}
    for key, val in all_data.items():
        if 'reviews' in val:
            review_data[key] = val['reviews']
    return review_data
Example #11
0
def _vi_to_zalo():
    squad_dir = "squad_data"
    zalo_samples = []
    _id = 0
    for file_name in ["vi_train-v2.0.json", "vi_dev-v2.0.json"]:
        file_path = "{}/{}".format(squad_dir, file_name)
        samples = read_json_data(file_path)
        for sample in samples["data"]:
            title = sample["title"]
            for p in sample["paragraphs"]:
                context = p["context"]
                for qa in p["qas"]:
                    zalo_sample = {
                        "id": "squad-{}".format(_id),
                        "title": title,
                        "question": qa["question"],
                        "text": context,
                        "label": not qa["is_impossible"],
                    }
                    zalo_samples.append(zalo_sample)
                    _id += 1

    out_path = "qna_data/squad.json"
    write_json_data(out_path, zalo_samples)
    print ("Write file {}".format(out_path))
Example #12
0
    def from_json(cls, file_path):
        vocab_data = read_json_data(file_path)

        return cls(
            word2id=vocab_data["word2id"],
            max_sent_len=vocab_data["max_sent_len"],
        )
Example #13
0
def _vi_to_qnli(dataset, lang="vi"):
    squad_data_folder = "squad_data"
    json_path = "{}/{}_{}-v2.0.json".format(squad_data_folder, lang, dataset)

    json_examples = read_json_data(json_path)

    id_idx = 0
    df_dict = {
        "index": [],
        "question": [],
        "sentence": [],
        "label": [],
    }
    for json_example in json_examples["data"]:
        for p in json_example["paragraphs"]:
            context = p["context"]
            for qa in p["qas"]:
                df_dict["index"].append(id_idx)
                df_dict["question"].append(_simple_preprocess(qa["question"]))
                df_dict["sentence"].append(_simple_preprocess(context))
                df_dict["label"].append("not_entailment" if qa["is_impossible"] else "entailment")
                id_idx += 1

    df = pd.DataFrame(df_dict)
    tsv_path = "{}/{}_{}.tsv".format(squad_data_folder, lang, dataset)
    _write_tsv(df, tsv_path)
    print ("Write file {}".format(tsv_path))
Example #14
0
def cal_ave_citation_with_sentiment():
	data_path = os.path.join(RESULT_DIR, 'processed_token_result.json')
	data = read_json_data(data_path)
	res = {}
	for aim in aim_list:
		res[aim] = {}
		res[aim]['citation'] = {}
		res[aim]['count'] = {}
		for _ in range(2):
			res[aim]['citation'][_] = 0
			res[aim]['count'][_] = 0
	for key, val in data.items():
		for aspect, emotion in val['reviews'].items():
			if emotion < 0:
				continue
			res[aspect]['citation'][emotion] += val['citation']
			res[aspect]['count'][emotion] += 1
	print(res)
	for key, val in res.items():
		print(key)
		for _ in range(2):
			if _ == 0:
				print('负面情感平均引用')
			else:
				print('正面情感平均引用')
			if val['count'][_] == 0:
				print('zero count')
			else:
				print(val['citation'][_] / val['count'][_])
Example #15
0
    def load_qna_data(cls, method="normal", build_data=True, mode="train"):
        train_file = "qna_data/{}_train.json".format(method)
        test_file = "qna_data/{}_test.json".format(method)

        train_question_texts, train_paragraph_texts = [], []
        train_labels = []
        train_titles = []
        test_question_texts, test_paragraph_texts = [], []
        test_q_p_ids = []

        test_json = read_json_data(test_file)

        pre_title_key = get_method_key("title", method)
        pre_question_key = get_method_key("question", method)
        pre_text_key = get_method_key("text", method)
        pre_paragraphs_key = get_method_key("paragraphs", method)

        if mode == "train":
            train_json = read_json_data(train_file)
            for train_sample in train_json:
                train_question_texts.append(train_sample[pre_question_key])
                train_paragraph_texts.append(train_sample[pre_text_key])
                train_labels.append(train_sample["label"])
                train_titles.append(train_sample[pre_title_key])

        for test_sample in test_json:
            for p in test_sample[pre_paragraphs_key]:
                test_question_texts.append(test_sample[pre_question_key])
                test_paragraph_texts.append(p["text"])
                test_q_p_ids.append((test_sample["__id__"], p["id"]))

        dataset = cls(
            train_question_texts=train_question_texts,
            train_paragraph_texts=train_paragraph_texts,
            train_labels=train_labels,
            train_titles=train_titles,
            test_question_texts=test_question_texts,
            test_paragraph_texts=test_paragraph_texts,
            test_q_p_ids=test_q_p_ids,
            method=method,
        )

        if build_data:
            dataset.build_data()

        return dataset
Example #16
0
    def get(self):
        config_data = utils.read_json_data(self.config_file)
        if not config_data:
            abort(404, message="No valid config file found.")

        dirname = os.path.dirname(self.config_file)
        filename = os.path.basename(self.config_file)
        return send_from_directory(dirname,
                                   filename,
                                   attachment_filename=filename)
Example #17
0
def _gen_wrong_dev(model_dir, details_file_name, json_data_path):
    details_path = "{}/{}.csv".format(model_dir, details_file_name)
    details_df = pd.read_csv(details_path)
    data_dict = read_json_data(json_data_path)

    wrong_path = "{}/{}_wrong.csv".format(model_dir, details_file_name)
    wrong_df = details_df.loc[details_df["correct"] == 0]
    _write_dev_details(wrong_df, wrong_path, data_dict)

    right_path = "{}/{}_right.csv".format(model_dir, details_file_name)
    right_df = details_df.loc[details_df["correct"] == 1]
    _write_dev_details(right_df, right_path, data_dict)
Example #18
0
    def _from_json(self):
        vocab_file = "qna_data/{}_vocab.json".format(self.method)
        dataset_file = "qna_data/{}_dataset.json".format(self.method)

        self.vocab = VocabEntry.from_json(vocab_file)

        dataset_json = read_json_data(dataset_file)

        for key in self.train_keys:
            setattr(self, key, dataset_json[key])

        self._to_numpy()
Example #19
0
def gen_datasets_token():
	result_file = os.path.join(RESULT_DIR, 'old-dataset_token.json')
	data = read_json_data(os.path.join(DATASET_DIR, 'old-dataset-all/all.json'))
	for key, val in data.items():
		reviews = val.get('reviews', None)
		if not reviews:
			continue
		res = []
		for review in reviews:
			token = split_paragraph_into_sentence(review)
			res.append(token)
		data[key]['reviews'] = res
	with open(result_file, 'w') as f:
		json.dump(data, f)
class TestLogin(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.login_api = LoginApi()

    @parameterized.expand(
        read_json_data(app.BASE_DIR + "/data/login_data.json"))
    def test_01login(self, case_name, request_body, success, code, message,
                     http_code):
        json_data = request_body
        headers = {"Content-Type": "application/json"}
        response = self.login_api.login(json_data=json_data, headers=headers)

        # 调用断言方法
        assert_text(self, http_code, success, code, message, response)
Example #21
0
def _zalo_to_glue(file_name, pre_method="normal_cased"):
    parts = file_name.split("_")
    lang = parts[0]
    dataset_type = parts[1]
    json_file = "qna_data/{}.json".format(file_name)
    tsv_file = "qna_data/glue_data/{}/final/{}.tsv".format(lang, dataset_type)

    json_samples = read_json_data(json_file)

    glue_dict = {
        "index": [],
        "question": [],
        "sentence": [],
        "label": [],
        "pid": [],
    }
    id_pids = []

    for idx, json_sample in enumerate(json_samples):
        glue_dict["index"].append(idx)
        glue_dict["question"].append(
            _pre_process_question(
                json_sample["{}_question".format(pre_method)]))
        glue_dict["sentence"].append(
            _pre_process_sentence(json_sample["{}_text".format(pre_method)]))
        glue_dict["label"].append(
            "entailment" if json_sample["label"] else "not_entailment")
        id_pid = "{}@{}".format(json_sample["id"], json_sample["pid"])
        id_pids.append(id_pid)
        glue_dict["pid"].append(id_pid)

    glue_df = pd.DataFrame(glue_dict)

    if file_name == "vi_train":
        _split_train("qna_data/glue_data/vi/final", glue_df, "train90",
                     "dev10")

    if file_name == "vi_btrain":
        _split_train("qna_data/glue_data/vi/final", glue_df, "btrain90",
                     "bdev10")

    # divide in into k folds here
    # _split_k_folds(glue_df, lang)

    if "test" in file_name or "private" in file_name:
        _write_pids(id_pids, lang, dataset_type)

    _write_tsv(glue_df, tsv_file)
Example #22
0
    def preprocess_qna_data(
        self, method, bert_type, dataset_types,
    ):
        for dataset_type in dataset_types:
            data_file = "qna_data/en_{}.json".format(dataset_type)

            # Init features columns
            if self.for_train:
                features_columns = {
                    "id": [],
                    "question": [],
                    "text": [],
                    "label": [],
                    "pid": [],
                }

            json_samples = read_json_data(data_file)

            for json_sample in json_samples:
                if self.for_train:
                    features_columns["id"].append(json_sample["id"])
                    features_columns["label"].append(1 if json_sample["label"] else 0)
                    features_columns["pid"].append(json_sample["pid"])

                for key in ["question", "text"]:
                    pre_key = "{}_{}_{}".format(
                        method, bert_type, key
                    )
                    pre_text, tokens_id = self.pre_process_text(
                        json_sample[key], method, self.for_train
                    )
                    json_sample[pre_key] = pre_text

                    if self.for_train:
                        features_columns[key].append(tokens_id)

            # samples with preprocessed keys
            write_json_data(data_file, json_samples)
            print ("{}. Length {}. Done write to file {}".format(
                dataset_type, len(json_samples), data_file
            ))

            # generate featured dataset
            if self.for_train:
                folder_name = "{}_{}".format(method, bert_type)
                self.write_features_columns(
                    features_columns, folder_name, dataset_type
                )
def gen_bar():
    data = read_json_data(DATA_PATH)
    cite_count = []
    sentiment_count_pos = {}
    sentiment_count_neg = {}
    sentiment_count_all_pos = {}
    sentiment_count_all_neg = {}
    for aim in aim_list:
        sentiment_count_pos[aim] = []
        sentiment_count_neg[aim] = []
        sentiment_count_all_neg[aim] = []
        sentiment_count_all_pos[aim] = []
    upper_limit = 500
    interval = 50
    button_limit = 0
    name_list = range(0, upper_limit + interval, interval)
    for _ in range((upper_limit // interval) + 1):
        cite_count.append(0)
        for aim in aim_list:
            sentiment_count_pos[aim].append(0)
            sentiment_count_neg[aim].append(0)
            sentiment_count_all_neg[aim].append(0)
            sentiment_count_all_pos[aim].append(0)

    for key, val in data.items():
        if val['citation'] > upper_limit or val['citation'] < button_limit:
            continue
        for aim in aim_list:
            if val['reviews'][aim] < 0:
                continue
            if val['reviews'][aim] > 0:
                sentiment_count_pos[aim][val['citation'] // interval] += 1
            else:
                sentiment_count_neg[aim][val['citation'] // interval] += 1
    for aim in aim_list:
        for _ in range((upper_limit // interval) + 1):
            if sentiment_count_pos[aim][_] + sentiment_count_neg[aim][_] != 0:
                sentiment_count_all_pos[aim][_] = sentiment_count_pos[aim][_] / (
                        sentiment_count_pos[aim][_] + sentiment_count_neg[aim][_])
            if sentiment_count_pos[aim][_] + sentiment_count_neg[aim][_] != 0:
                sentiment_count_all_neg[aim][_] = -sentiment_count_neg[aim][_] / (
                        sentiment_count_pos[aim][_] + sentiment_count_neg[aim][_])

    for aim in aim_list:
        plt.bar(range(upper_limit // interval + 1), sentiment_count_all_pos[aim], fc='g', tick_label=name_list)
        plt.bar(range(upper_limit // interval + 1), sentiment_count_all_neg[aim], fc='r', tick_label=name_list)
        plt.title(aim)
        plt.show()
Example #24
0
    def __init__(self,
                 lang,
                 data_folder,
                 model_type,
                 lr,
                 current_epoch=0,
                 input_feature_cols=["input_features"]):
        self.lang = lang
        self.data_folder = data_folder
        self.model_type = model_type
        self.current_epoch = current_epoch
        self.input_feature_cols = input_feature_cols

        # bert model path for en
        if lang == "en":
            bert_model_path, _, _ = get_bert_paths(data_folder.split("_")[-1])
        else:
            bert_model_path = None

        # combine from data_folder and model_type
        save_folder = "{}_{}".format(model_type, data_folder)
        self.checkpoint_path = "models/{}".format(save_folder)
        create_folder(self.checkpoint_path)

        saved_epoch_path = None
        if current_epoch > 0:
            saved_epoch = current_epoch - 1
            saved_epoch_path = "{}/{}/{}".format(self.checkpoint_path,
                                                 saved_epoch, saved_epoch)

            # create report folder
            self.report_folder = "reports/{}/{}".format(
                save_folder, saved_epoch)
            create_folder(self.report_folder)

        # get configs
        configs_file = "qna_data/pre_data/{}/configs.json".format(data_folder)
        configs = read_json_data(configs_file)

        self.model = get_model(lang=lang,
                               model_type=model_type,
                               bert_model_path=bert_model_path,
                               saved_epoch_path=saved_epoch_path,
                               configs=configs)

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
Example #25
0
def process_old_sentiment_data():
	data_path = os.path.join(RESULT_DIR, 'old_all_info.json')
	result_path = os.path.join(RESULT_DIR, 'processed_old_all_info.json')
	res = {}
	data = read_json_data(data_path)
	for key, val in data.items():
		res[key] = {}
		res[key]['citation'] = val['citation']
		res[key]['reviews'] = {}
		for aspect, sentiments in val['sentiment'].items():
			temp = []
			for sentiment in sentiments:
				emotion = get_sentiment(sentiment)
				temp.append(emotion)
			res[key]['reviews'][aspect] = get_final_sentiment(temp)
	with open(result_path, 'w') as f:
		json.dump(res, f)
Example #26
0
    def _build_vocab(self, vocab_file, method, cased):
        corpus = []

        for dataset_type in ["train", "test"]:
            data_file = "qna_data/vi_{}.json".format(dataset_type)

            json_samples = read_json_data(data_file)

            for json_sample in json_samples:
                for key in ["question", "text"]:
                    pre_key = "{}_{}_{}".format(
                        method, cased, key
                    )
                    pre_text = json_sample[pre_key]

                    corpus.append(pre_text.split())

        self.vocab = VocabEntry.from_corpus(corpus, freq_cutoff=3)
        self.vocab.save_json(vocab_file)
        print ("Save vocab to file {}".format(vocab_file))
Example #27
0
def activities():
    # return redirect("https://sites.google.com/a/dm.snu.ac.kr/snudm_seminar/")
    """
    if request.method =='POST':
        if 'file' not in request.files:
            flash('파일이 없습니다.')
            return redirect(request.url)
        file = request.files['file']

        if file.filename =='':
            flash('선택된 파일이 없습니다.')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            filename = datetime.today().strftime('%Y%m%d')+'-'+filename
            file.save(os.path.join(app.config['SEMINAR_FORDER'],filename))
    return render_template('activities.html', seminar_data = read_seminar_data())
    """
    return render_template('activities.html',
                           activities=read_json_data('activities.json'))
Example #28
0
def _squad_to_examples(squad_json_file, start_id=0):
    json_data = read_json_data(squad_json_file)

    ids = []
    questions = []
    sentences = []
    labels = []
    for json_sample in json_data["data"]:
        for paragraph in json_sample["paragraphs"]:
            context = paragraph["context"]
            for qa in paragraph["qas"]:
                ids.append(start_id)
                questions.append(_pre_process_question(qa["question"]))
                sentences.append(_pre_process_sentence(context))
                labels.append(
                    "not_entailment" if qa["is_impossible"] else "entailment")

                start_id += 1

    return ids, questions, sentences, labels
Example #29
0
def activities():
    # return redirect("https://sites.google.com/a/dm.snu.ac.kr/snudm_seminar/")
    """
    if request.method =='POST':
        if 'file' not in request.files:
            flash('파일이 없습니다.')
            return redirect(request.url)
        file = request.files['file']

        if file.filename =='':
            flash('선택된 파일이 없습니다.')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            filename = datetime.today().strftime('%Y%m%d')+'-'+filename
            file.save(os.path.join(app.config['SEMINAR_FORDER'],filename))
    return render_template('activities.html', seminar_data = read_seminar_data())
    """
    return render_template('activities.html',
                           activities=read_json_data('activities.json'))
Example #30
0
def get_titles():
    dataset_types = ["train", "test"]
    titles_file = "qna_data/wiki_data/titles.txt"

    with open(titles_file, "r", encoding="utf-8") as f:
        titles = f.read().split("\n")[:-1]
        print("Number of titles before: ", len(titles))

    for dataset_type in dataset_types:
        json_samples = read_json_data(
            "qna_data/vi_{}.json".format(dataset_type))

        for json_sample in json_samples:
            title = json_sample["title"].strip().replace(" ", " ")
            if title and title not in titles:
                titles.append(title)

    print("Number of titles after: ", len(titles))
    with open(titles_file, "w", encoding="utf-8") as f:
        for title in titles:
            f.write("{}\n".format(title))
Example #31
0
def convert_data(dataset_type, include_txt=True):
    file_path = "qna_data/{}.json".format(dataset_type)

    data_json = read_json_data(file_path)

    converted_samples = []

    for sample_json in data_json:
        converted_sample = None

        if dataset_type in ["train", "squad"]:
            converted_sample = sample_json
            converted_sample["pid"] = "p1"
            converted_samples.append(converted_sample)

        elif dataset_type in ["test", "private", "ltest"]:
            for p in sample_json["paragraphs"]:
                if 'label' in p:
                    label = True if p['label'] == '1' else False
                else:
                    label = False
                converted_sample = {
                    "id": sample_json["__id__"],
                    "title": sample_json["title"],
                    "question": sample_json["question"],
                    "text": p["text"],
                    "label": label,
                    "pid": p["id"]
                }
                converted_samples.append(converted_sample)

    new_file_path = "qna_data/vi_{}.json".format(dataset_type)

    write_json_data(new_file_path, converted_samples)
    print ("Length {}. Done write to file {}".format(len(converted_samples), new_file_path))

    write_txt_for_translation(converted_samples, dataset_type) # write only vi files
    print ("Done write raw files for translation")
Example #32
0
def convert_raw_en_to_json(dataset_type):
    raw_id_type_file = "qna_data/back_tran/raw_id_type_{}.txt".format(dataset_type)
    raw_en_file = "qna_data/back_tran/raw_vi_{}.txt".format(dataset_type)
    en_file = "qna_data/vi_{}.json".format(dataset_type)
    # for getting the title only
    vi_json_file = "qna_data/vi_{}.json".format(dataset_type[1:])
    vi_json_samples = read_json_data(vi_json_file)

    en_json_samples = []
    id_lines = [line.strip() for line in open(raw_id_type_file, "r", encoding="utf-8")]
    en_lines = [line.strip() for line in open(raw_en_file, "r", encoding="utf-8")]

    current_question = None
    text_idx = 0
    for i, id_line in enumerate(id_lines):
        parts = id_line.split("\t")

        if parts[1] == "question":
            current_question = {
                "id": parts[0],
                "question": en_lines[i],
            }

        elif parts[1] == "text":
            en_json_sample = copy.deepcopy(current_question)
            en_json_sample["title"] = vi_json_samples[text_idx]["title"]
            en_json_sample["text"] = en_lines[i]
            en_json_sample["label"] = True if parts[3] == "True" else False
            en_json_sample["pid"] = parts[2]

            en_json_samples.append(en_json_sample)
            text_idx += 1

    write_json_data(en_file, en_json_samples)
    print ("{}. Length {}. Done write to file {}".format(
        dataset_type, len(en_json_samples), en_file
    ))
Example #33
0
def alumni():
    return render_template('alumni.html', alumni=read_json_data('alumni.json'))
Example #34
0
def admission():
    return render_template('admission.html',\
           admission=read_json_data('admission.json'))
Example #35
0
def software():
    return render_template('software.html',\
           software=read_json_data('software.json'))
Example #36
0
def topics():
    return render_template('topics.html',\
           menus=MENUS,
           topics=read_json_data('topics.json'))
Example #37
0
def members():
    return render_template('members.html',\
           menus=MENUS,
           members=read_json_data('members.json'),
           alumni_phd=read_csv_data('alumni_phd.csv'),
           alumni_ms=read_csv_data('alumni_ms.tsv', sep='\t'))
Example #38
0
def courses():
    return render_template('courses.html',
           courses=read_json_data('courses.json'))
Example #39
0
def faq():
    return render_template('faq.html',\
           menus=MENUS,
           faq=read_json_data('faq.json'))
Example #40
0
def sponsors():
    return render_template('sponsors.html',\
           topics=read_json_data('sponsors.json'))
Example #41
0
def datamining():
    return render_template('datamining.html',\
           datamining=read_json_data('datamining.json'))
Example #42
0
def students():
    headers = [u'박사 수료', u'박사 과정', u'석사 과정', u'휴학생']
    member_keys = ['phd_candidates', 'phd_students', 'ms_students', 'on_leave']

    return render_template('students.html', students=read_json_data('members.json'), member_header_key_pairs=zip(headers, member_keys))
Example #43
0
def phd():
    return render_template('degrees_phd.html',
                           phd=read_json_data('degrees.json'))
Example #44
0
def education():
    return render_template('education.html',\
           educations=read_json_data('education.json'))
Example #45
0
def members():
    return render_template('members.html',\
           members=read_json_data('members.json'),
           alumni=read_json_data('alumni.json'))
Example #46
0
def faq():
    print request.view_args
    return render_template('faq.html',\
           faq=read_json_data('faq.json'))
Example #47
0
def admission():
    return render_template('degrees_qna.html',
                           admission=read_json_data('degrees.json'))
Example #48
0
def masters():
    return render_template('degrees_masters.html',
                           masters=read_json_data('degrees.json'))