コード例 #1
0
def load_all(dataset_dir, doc_ids=None, filter_types=None, filter_senses=None):
    """Load whole CoNLL16st dataset by document id."""

    # load all provided files untouched
    parses = load_parses(dataset_dir, doc_ids=doc_ids)
    doc_ids = sorted(parses.keys())
    raws = load_raws(dataset_dir, doc_ids=doc_ids)
    relations_gold = load_relations_gold(dataset_dir, doc_ids=doc_ids, with_senses=True, filter_types=filter_types, filter_senses=filter_senses)
    if relations_gold:
        relationsnos_gold = relations_gold
    else:
        relationsnos_gold = load_relations_gold(dataset_dir, doc_ids=doc_ids, with_senses=False, filter_types=filter_types, filter_senses=filter_senses)

    # extract data by document id and token id
    words = get_words(parses)
    pos_tags = get_pos_tags(parses)
    word_metas = get_word_metas(parses, raws)

    # extract data by document id and token id pairs
    dependencies = get_dependencies(parses)

    # extract data by document id
    parsetrees = get_parsetrees(parses)

    # extract data by relation id
    rel_parts = get_rel_parts(relationsnos_gold)
    rel_ids = sorted(rel_parts.keys())
    rel_types = get_rel_types(relations_gold)
    rel_senses = get_rel_senses(relations_gold)

    # add extra fields
    add_relation_tags(word_metas, rel_types, rel_senses)

    return doc_ids, words, word_metas, pos_tags, dependencies, parsetrees, rel_ids, rel_parts, rel_types, rel_senses, relations_gold
コード例 #2
0
ファイル: lab.py プロジェクト: coreyabshire/stacko
def extract_svm_features(vidx, data):
    x = scipy.sparse.lil_matrix((len(data), len(vidx)))
    for i in range(len(data.BodyMarkdown)):
        for w in get_words(data.BodyMarkdown[i]):
            if w in vidx:
                x[i, vidx[w]] = 1.0
    return scipy.sparse.csr_matrix(x)
コード例 #3
0
ファイル: lab.py プロジェクト: coreyabshire/stacko
def extract_words_from_row(row):
    """
    This function turns a given row of data for this particular type
    of problem into a simple vector of words.
    """
    title = row["Title"]
    body = row["BodyMarkdown"]
    tags = " ".join(filter(istag, [row["Tag%d" % t] for t in range(1, 6)]))
    postid = row["PostId"]
    text = " ".join([title, body, tags])
    return get_words(text)
コード例 #4
0
ファイル: lab.py プロジェクト: coreyabshire/stacko
def get_svm_word_indices(vidx, text):
    x = np.zeros(len(vidx), dtype=numpy.double)
    for w in get_words(text):
        if w in vidx:
            x[vidx[w]] = 1.0
    return scipy.sparse.csr_matrix(x)
コード例 #5
0
ファイル: main_local.py プロジェクト: ilicv/Cyrilic_OCR
#filename = 'scan\\Jablanica0096_crop.tif'

#filename = 'scan\\paragraph_635.tif'
#filename = 'scan\\problem_italic.tif'
#filename = 'scan\\paragraph_265.tif'
#filename = 'scan\\test1.tif'

#filename = 'scan\\Dubrovnik0018_crop.tif'
#filename = 'scan\\Jablanica0096_crop.tif'
#filename = 'scan\\JuznaSrbija0052_crop.tif'
#filename = 'scan\\Piva0015_crop.tif'
#filename = 'scan\\Proscenje0064_crop.tif'

#filename = 'scan\\Uskoci0080_crop.tif'
#filename = 'scan\\Vasojevici0033_crop.tif'

#filename = 'scan\\line_165.tif'
#filename = 'scan\\line_177.tif'
filename = 'scan\\Vasojevici0033_crop.tif'

cl.clean_all()
wp.get_words(filename)
el.extract_letters(filename)

rl.replace_diacritics(filename)
cmp.compare(filename, 'initial')

script_end = datetime.datetime.now()
print(script_end - script_start)
コード例 #6
0
ファイル: hangman.py プロジェクト: asc3ndr/zoom-meetings
import os
import words

clear = lambda: os.system("cls" if os.name == "nt" else "clear")
secret_word = list(words.get_words())
hangman_word = ["___" for letter in secret_word]
wrong_guesses = []
num_guesses = 10

while num_guesses > 0 and hangman_word != secret_word:

    print(" ".join(hangman_word))
    print(f"Du har {num_guesses} forsøk igjen.")
    print("\nDu har brukt: " + " ".join(wrong_guesses))

    guessed_letter = input("Guess a letter: ").lower()

    if (guessed_letter in hangman_word) or (guessed_letter in wrong_guesses):
        continue

    for index, letter in enumerate(secret_word):
        if guessed_letter == letter:
            hangman_word[index] = guessed_letter

    if guessed_letter not in secret_word:
        wrong_guesses.append(guessed_letter)
        num_guesses -= 1

    clear()

if hangman_word == secret_word:
コード例 #7
0
ファイル: funnyEmojis.py プロジェクト: s1611596/Hangman
#See more at https://www.geeksforgeeks.org/python-program-to-print-emojis/
import words
print(words.get_words())

# grinning face
print("\U0001f600")

# grimacing face
print("\N{grimacing face}")

print("😢")

# grinning squinting face
print("\U0001F606")

# rolling on the floor laughing
print("\U0001F923") \

#face with rolling eyes
print("\N{face with rolling eyes}")

print(u"\U0001F97A")