def go_test(update, context): id = update.message.from_user.id if id not in test_dict.keys(): path = 'all_comands/tests/' + update.message.text + '.json' test_dict[id] = Test(read_json(path)) else: test_dict[id].check_it(update.message.text) current = test_dict[id] text = current.ask_next() if text['count'] == 10: count = current.get_result() user = session.query(User).filter(User.id == id).first() res = TEXT[user.mode]['test'] update.message.reply_text(f'{res[1]} {count}/10\n{res[2]}', reply_markup=ReplyKeyboardRemove()) del test_dict[id] return ConversationHandler.END elif text['markup'] is not None: markup = ReplyKeyboardMarkup([text['markup'] + ['❌']], one_time_keyboard=True) update.message.reply_text(text['text'], reply_markup=markup) else: markup = ReplyKeyboardMarkup([['❌']], one_time_keyboard=True) update.message.reply_text(text['text'], reply_markup=markup) return 1
def main(): global words #tests() path_to_json = 'judgments' json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')] for jfile in json_files: tokenize(reader.read_json(path_to_json + '/' + jfile)) unigrams = Counter(words) bigrams = get_special_list() words = [] print(len(bigrams)) bigrams_pmi = calculate_pmi(unigrams, bigrams) bigrams_pmi = OrderedDict(sorted(bigrams_pmi.items(), key=lambda x: x[1], reverse=True)) count = 0 for e in bigrams_pmi: count += 1 print(e + ', ', end='') if count > 30: print('') break bigrams_llr = calculate_llr(bigrams, unigrams, sum(bigrams.values())) bigrams_llr = OrderedDict(sorted(bigrams_llr.items(), key=lambda x: x[1], reverse=True)) count = 0 for e in bigrams_llr: count += 1 if count == 1: print(e + ' ' + str(bigrams_llr[e])) print(e + ', ', end='') if count > 30: print('') break
def main(): #tests() path_to_json = 'C:/Users/Professional/Desktop/pjn/data/json' json_files = [ pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json') ] for jfile in json_files: tokenize(reader.read_json(path_to_json + '/' + jfile)) frequency_list = Counter(words) sorted_list = sorted(frequency_list.items(), key=operator.itemgetter(1), reverse=True) ready_list = remove(sorted_list) positions = list(range(0, len(ready_list))) wds, values = zip(*ready_list) pl.loglog(positions, values) pl.title('positions and number of words occurrences') pl.xlabel('position') pl.ylabel('occurences') pl.show() print(len(wds)) path = 'C:/Users/Professional/Desktop/pjn/polimorfologik-2.1/polimorfologik-2.1.txt' dparser.parse_dict(path) print(len(dparser.pdict.values())) dvals = list(itertools.chain.from_iterable(dparser.pdict.values())) not_in_dict = list(set(wds) - set(dvals)) print(len(not_in_dict)) chosen_words = [] for i in range(0, 30): chosen_words.append(not_in_dict[i]) corrected = levenshtein(ready_list, chosen_words) for i in range(0, len(chosen_words)): print(chosen_words[i] + ' ' + corrected[i][0])
def main(): path_to_json = 'C:/Users/Professional/Desktop/pjn/data/json' json_files = [ pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json') ] for jfile in json_files: find_szkoda(reader.read_json(path_to_json + '/' + jfile))
def init_es(): # creating the index es.indices.delete(index='lab', ignore=[400, 404]) res = es.indices.create(index='lab', body=index_body, ignore=400) print(res) path_to_json = 'C:/Users/Professional/Desktop/pjn/data/json' json_files = [ pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json') ] for jfile in json_files: save_in_es(reader.read_json(path_to_json + '/' + jfile))
def main(): global top_judgments #load jsons to global all_judgments path_to_json = 'judgments' json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')] for jfile in json_files: load_judgments(reader.read_json(path_to_json + '/' + jfile)) #sort by date sort_by_date() #save #for i in range(100): # reader.write_json('top/judgment'+str(i)+'.json', all_jugdments[i]) top_judgments = all_jugdments[:100] process_top_judgments()
def read(path, in_format=None, encoding='utf-8', csv_delimiter=','): """Read a dataframe from a given path with a given format.""" if not in_format: in_format = infer_format(path) dataframe = None if in_format == Format.csv: dataframe = pandas.read_csv( path, delimiter=csv_delimiter, encoding=encoding) elif in_format == Format.json: json_object = reader.read_json(path, encoding=encoding) dataframe = pandas.DataFrame.from_dict(json_object) elif in_format in {Format.xls, Format.xlsx}: dataframe = pandas.read_excel(path, encoding=encoding) else: raise ValueError("Invalid file extension %s" % in_format) return dataframe
import requests import os import reader url = 'http://localhost:9200' count = 0 def tag_judgment(data): global count for text in data['items']: if not reader.in_2008(text): continue count += 1 response = requests.post(url, text['textContent'].encode('utf-8')) with open('tagged.txt', 'a') as f: f.writelines(response.text) path_to_json = 'judgments' json_files = [ pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json') ] for jfile in json_files: tag_judgment(reader.read_json(path_to_json + '/' + jfile)) print(count)
import reader tokens = reader.read_file("caspalc/tokens") parser = reader.read_json("parse_table.json") productions = parser["productions"] table = parser["table"] def check(tokens): stack = ["$", "<S>"] i = 0 current_word = get_token_info(tokens[i]) while stack != ["$"]: if i >= len(tokens) and stack != ["$"]: print("ERROR: Unexpected end of file.") return False print(stack, tokens[i]) if stack[-1] == current_word: i += 1 if i < len(tokens): current_word = get_token_info(tokens[i]) del stack[-1] else: top = stack[-1] subs = productions[table[top][current_word]] if subs == ["ERR"]: print("ERROR: SYNTAX ERROR NEAR " + current_word) return False
import reader tokens = reader.read_file("caspalc/tokens") parser = reader.read_json("parse_table.json") productions = parser["productions"] table = parser["table"] def check(tokens): stack = ['$', '<S>'] i = 0 current_word = get_token_info(tokens[i]) while stack != ['$']: if (i >= len(tokens) and stack != ['$']): print("ERROR: Unexpected end of file.") return False print(stack, tokens[i]) if stack[-1] == current_word: i += 1 if i < len(tokens): current_word = get_token_info(tokens[i]) del stack[-1] else: top = stack[-1] subs = productions[table[top][current_word]] if subs == ["ERR"]: print("ERROR: SYNTAX ERROR NEAR " + current_word) return False