Beispiel #1
0
def go_test(update, context):
    id = update.message.from_user.id

    if id not in test_dict.keys():
        path = 'all_comands/tests/' + update.message.text + '.json'
        test_dict[id] = Test(read_json(path))
    else:
        test_dict[id].check_it(update.message.text)

    current = test_dict[id]
    text = current.ask_next()

    if text['count'] == 10:
        count = current.get_result()

        user = session.query(User).filter(User.id == id).first()
        res = TEXT[user.mode]['test']

        update.message.reply_text(f'{res[1]} {count}/10\n{res[2]}',
                                  reply_markup=ReplyKeyboardRemove())
        del test_dict[id]

        return ConversationHandler.END
    elif text['markup'] is not None:
        markup = ReplyKeyboardMarkup([text['markup'] + ['❌']],
                                     one_time_keyboard=True)
        update.message.reply_text(text['text'], reply_markup=markup)
    else:
        markup = ReplyKeyboardMarkup([['❌']], one_time_keyboard=True)
        update.message.reply_text(text['text'], reply_markup=markup)

    return 1
Beispiel #2
0
def main():
    global words
    #tests()
    path_to_json = 'judgments'
    json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
    for jfile in json_files:
        tokenize(reader.read_json(path_to_json + '/' + jfile))
    unigrams = Counter(words)
    bigrams = get_special_list()
    words = []
    print(len(bigrams))
    bigrams_pmi = calculate_pmi(unigrams, bigrams)
    bigrams_pmi = OrderedDict(sorted(bigrams_pmi.items(), key=lambda x: x[1], reverse=True))
    count = 0
    for e in bigrams_pmi:
        count += 1
        print(e + ', ', end='')
        if count > 30:
            print('')
            break
    bigrams_llr = calculate_llr(bigrams, unigrams, sum(bigrams.values()))
    bigrams_llr = OrderedDict(sorted(bigrams_llr.items(), key=lambda x: x[1], reverse=True))
    count = 0
    for e in bigrams_llr:
        count += 1
        if count == 1:
            print(e + ' ' + str(bigrams_llr[e]))
        print(e + ', ', end='')
        if count > 30:
            print('')
            break
Beispiel #3
0
def main():
    #tests()
    path_to_json = 'C:/Users/Professional/Desktop/pjn/data/json'
    json_files = [
        pos_json for pos_json in os.listdir(path_to_json)
        if pos_json.endswith('.json')
    ]
    for jfile in json_files:
        tokenize(reader.read_json(path_to_json + '/' + jfile))

    frequency_list = Counter(words)
    sorted_list = sorted(frequency_list.items(),
                         key=operator.itemgetter(1),
                         reverse=True)
    ready_list = remove(sorted_list)
    positions = list(range(0, len(ready_list)))
    wds, values = zip(*ready_list)
    pl.loglog(positions, values)
    pl.title('positions and number of words occurrences')
    pl.xlabel('position')
    pl.ylabel('occurences')
    pl.show()
    print(len(wds))
    path = 'C:/Users/Professional/Desktop/pjn/polimorfologik-2.1/polimorfologik-2.1.txt'
    dparser.parse_dict(path)
    print(len(dparser.pdict.values()))
    dvals = list(itertools.chain.from_iterable(dparser.pdict.values()))
    not_in_dict = list(set(wds) - set(dvals))
    print(len(not_in_dict))
    chosen_words = []
    for i in range(0, 30):
        chosen_words.append(not_in_dict[i])
    corrected = levenshtein(ready_list, chosen_words)
    for i in range(0, len(chosen_words)):
        print(chosen_words[i] + ' ' + corrected[i][0])
Beispiel #4
0
def main():
    path_to_json = 'C:/Users/Professional/Desktop/pjn/data/json'
    json_files = [
        pos_json for pos_json in os.listdir(path_to_json)
        if pos_json.endswith('.json')
    ]
    for jfile in json_files:
        find_szkoda(reader.read_json(path_to_json + '/' + jfile))
Beispiel #5
0
def init_es():
    # creating the index
    es.indices.delete(index='lab', ignore=[400, 404])
    res = es.indices.create(index='lab', body=index_body, ignore=400)
    print(res)

    path_to_json = 'C:/Users/Professional/Desktop/pjn/data/json'
    json_files = [
        pos_json for pos_json in os.listdir(path_to_json)
        if pos_json.endswith('.json')
    ]
    for jfile in json_files:
        save_in_es(reader.read_json(path_to_json + '/' + jfile))
Beispiel #6
0
def main():
    global top_judgments
    #load jsons to global all_judgments
    path_to_json = 'judgments'
    json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
    for jfile in json_files:
        load_judgments(reader.read_json(path_to_json + '/' + jfile))
    #sort by date
    sort_by_date()
    #save
    #for i in range(100):
    #    reader.write_json('top/judgment'+str(i)+'.json', all_jugdments[i])
    top_judgments = all_jugdments[:100]
    process_top_judgments()
Beispiel #7
0
def read(path, in_format=None, encoding='utf-8', csv_delimiter=','):
    """Read a dataframe from a given path with a given format."""
    if not in_format:
        in_format = infer_format(path)

    dataframe = None
    if in_format == Format.csv:
        dataframe = pandas.read_csv(
            path, delimiter=csv_delimiter, encoding=encoding)
    elif in_format == Format.json:
        json_object = reader.read_json(path, encoding=encoding)
        dataframe = pandas.DataFrame.from_dict(json_object)
    elif in_format in {Format.xls, Format.xlsx}:
        dataframe = pandas.read_excel(path, encoding=encoding)
    else:
        raise ValueError("Invalid file extension %s" % in_format)

    return dataframe
Beispiel #8
0
import requests
import os
import reader

url = 'http://localhost:9200'
count = 0


def tag_judgment(data):
    global count
    for text in data['items']:
        if not reader.in_2008(text):
            continue
        count += 1
        response = requests.post(url, text['textContent'].encode('utf-8'))
        with open('tagged.txt', 'a') as f:
            f.writelines(response.text)


path_to_json = 'judgments'
json_files = [
    pos_json for pos_json in os.listdir(path_to_json)
    if pos_json.endswith('.json')
]
for jfile in json_files:
    tag_judgment(reader.read_json(path_to_json + '/' + jfile))

print(count)
Beispiel #9
0
import reader

tokens = reader.read_file("caspalc/tokens")
parser = reader.read_json("parse_table.json")
productions = parser["productions"]
table = parser["table"]


def check(tokens):
    stack = ["$", "<S>"]
    i = 0
    current_word = get_token_info(tokens[i])

    while stack != ["$"]:
        if i >= len(tokens) and stack != ["$"]:
            print("ERROR: Unexpected end of file.")
            return False

        print(stack, tokens[i])
        if stack[-1] == current_word:
            i += 1
            if i < len(tokens):
                current_word = get_token_info(tokens[i])
            del stack[-1]
        else:
            top = stack[-1]
            subs = productions[table[top][current_word]]

            if subs == ["ERR"]:
                print("ERROR: SYNTAX ERROR NEAR " + current_word)
                return False
Beispiel #10
0
import reader

tokens = reader.read_file("caspalc/tokens")
parser = reader.read_json("parse_table.json")
productions = parser["productions"]
table = parser["table"]


def check(tokens):
    stack = ['$', '<S>']
    i = 0
    current_word = get_token_info(tokens[i])

    while stack != ['$']:
        if (i >= len(tokens) and stack != ['$']):
            print("ERROR: Unexpected end of file.")
            return False

        print(stack, tokens[i])
        if stack[-1] == current_word:
            i += 1
            if i < len(tokens):
                current_word = get_token_info(tokens[i])
            del stack[-1]
        else:
            top = stack[-1]
            subs = productions[table[top][current_word]]

            if subs == ["ERR"]:
                print("ERROR: SYNTAX ERROR NEAR " + current_word)
                return False