Python FileUtils.random_path Examples

Programming Language: Python

Namespace/Package Name: utils

Class/Type: FileUtils

Method/Function: random_path

Examples at hotexamples.com: 2

Python FileUtils.random_path - 2 examples found. These are the top rated real world Python examples of utils.FileUtils.random_path extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

input(10)

doCopy(9)

delFile(8)

rename(6)

getfile(6)

get_file(5)

FileUtils(5)

make_dirs(4)

write_to_file(3)

get_file_name(3)

to_ENML(3)

remove_dirs(3)

list_top_folders_names(3)

list_top_files_names(3)

join(3)

int_input(3)

savaToPng(2)

read_from_file(2)

make_dir(2)

is_file(2)

get_reqd_fileset(2)

get_plan_from_file(2)

joinPath(2)

exists(2)

get_empty_file(2)

get_content_from_data(2)

screen_shot_dir(1)

removeFile(1)

exist_file_folder(1)

cache_dir(1)

download(1)

saveBinary(1)

get_files(1)

create_file(1)

read_excel_file(1)

transformations(1)

update_files_list(1)

writePlanToFile(1)

writeProblemToFile(1)

write_series2file(1)

transformation_v1_to_v2(1)

random_path(1)

read(1)

delete_file(1)

get_observed_data_filename(1)

copy(1)

configureWrite(1)

add_to_pickle(1)

getNumerosFromFile(1)

getDataset(1)

Example #1

Show file

File: analyze.py Project: eduOSS/LearnRead

def analyze(filename, known_words_filepath, not_known_words_filepath, print_example):
    known_words = set(FileUtils.read(known_words_filepath).split())
    not_known_words = set(FileUtils.read(not_known_words_filepath).split())
    tmp_filepath = FileUtils.random_path()
    output_filepath = tmp_filepath + '.xml'
    FileUtils.copy(filename, tmp_filepath)

    croncob_word_list = os.path.join('data', 'corncob_lowercase.txt')
    word_filter = WordFilterFactory.create_word_filter(croncob_word_list)

    cmd = ['java',
           '-cp',
           'stanford-corenlp-full/stanford-corenlp-3.3.1.jar:stanford-corenlp-full/stanford-corenlp-3.3.1-models.jar:stanford-corenlp-full/xom.jar:stanford-corenlp-full/joda-time.jar:stanford-corenlp-full/jollyday.jar:stanford-corenlp-full/ejml-0.23.jar',
           '-Xmx2g',
           'edu.stanford.nlp.pipeline.StanfordCoreNLP',
           '-annotators',
           'tokenize,ssplit,pos,lemma',
           '-file',
           tmp_filepath,
           '-outputDirectory',
           '/tmp/'
    ]
    subprocess.call(cmd)
    raw_output = FileUtils.read(output_filepath)
    d = xmltodict.parse(raw_output)
    sentences = d['root']['document']['sentences']['sentence']

    candidate_words = defaultdict(dict)

    def word_filter_fun(word, lemma, tag):
        del word
        del tag
        return word_filter.isok(lemma)

    def adjective_filter_fun(word, lemma, tag):
        del word
        del lemma
        if tag in ['JJR', 'JJS']:
            return False
        else:
            return True

    filters = [
            word_filter_fun,
            adjective_filter_fun
    ]

    for sentence_dict in sentences:
        tokens = sentence_dict['tokens']['token']
        if not isinstance(tokens, list):
            continue

        last_offset = int(tokens[0]['CharacterOffsetBegin'])
        sentence_raw = ''
        for token in tokens:
            word = token['word']
            begin_offset = int(token['CharacterOffsetBegin'])
            sentence_raw += (begin_offset - last_offset) * ' '
            sentence_raw += word
            last_offset = int(token['CharacterOffsetEnd'])

        for token in tokens:
            word = token['word']
            lemma = token['lemma']
            tag = token['POS']

            if tag in TAG_TO_PART_OF_SPEECH:
                ok = True
                for filter_fun in filters:
                    if not filter_fun(word, lemma, tag):
                        ok = False
                        break
                if ok:
                    candidate_words[(lemma, TAG_TO_PART_OF_SPEECH[tag])] = {
                        'example_sentence': sentence_raw,
                        'word': word
                    }

    not_known = []
    for ((lemma, part_of_speech), d) in candidate_words.iteritems():
        if lemma not in known_words and lemma not in not_known_words:
            not_known.append((lemma, part_of_speech, d))

    for (lemma, part_of_speech, d) in not_known:
        word = d['word']
        example_sentence = d['example_sentence']
        out = '(%s.) %s' % (
            convert_part_of_speech(part_of_speech),
            lemma
            )

        if print_example:
            line = utils.fill_suffix(out, 22, ' ') + ' # ' + example_sentence
            match_pos = re.search(word, example_sentence).start()
            print line.encode('utf-8')
            print ((match_pos + 25) * ' ') + (len(word) * '^')
        else:
            print out.encode('utf-8')

Example #2

Show file

def analyze(filename, known_words_filepath, not_known_words_filepath,
            print_example):
    known_words = set(FileUtils.read(known_words_filepath).split())
    not_known_words = set(FileUtils.read(not_known_words_filepath).split())
    tmp_filepath = FileUtils.random_path()
    output_filepath = tmp_filepath + '.xml'
    FileUtils.copy(filename, tmp_filepath)

    croncob_word_list = os.path.join('data', 'corncob_lowercase.txt')
    word_filter = WordFilterFactory.create_word_filter(croncob_word_list)

    cmd = [
        'java', '-cp',
        'stanford-corenlp-full/stanford-corenlp-3.3.1.jar:stanford-corenlp-full/stanford-corenlp-3.3.1-models.jar:stanford-corenlp-full/xom.jar:stanford-corenlp-full/joda-time.jar:stanford-corenlp-full/jollyday.jar:stanford-corenlp-full/ejml-0.23.jar',
        '-Xmx2g', 'edu.stanford.nlp.pipeline.StanfordCoreNLP', '-annotators',
        'tokenize,ssplit,pos,lemma', '-file', tmp_filepath, '-outputDirectory',
        '/tmp/'
    ]
    subprocess.call(cmd)
    raw_output = FileUtils.read(output_filepath)
    d = xmltodict.parse(raw_output)
    sentences = d['root']['document']['sentences']['sentence']

    candidate_words = defaultdict(dict)

    def word_filter_fun(word, lemma, tag):
        del word
        del tag
        return word_filter.isok(lemma)

    def adjective_filter_fun(word, lemma, tag):
        del word
        del lemma
        if tag in ['JJR', 'JJS']:
            return False
        else:
            return True

    filters = [word_filter_fun, adjective_filter_fun]

    for sentence_dict in sentences:
        tokens = sentence_dict['tokens']['token']
        if not isinstance(tokens, list):
            continue

        last_offset = int(tokens[0]['CharacterOffsetBegin'])
        sentence_raw = ''
        for token in tokens:
            word = token['word']
            begin_offset = int(token['CharacterOffsetBegin'])
            sentence_raw += (begin_offset - last_offset) * ' '
            sentence_raw += word
            last_offset = int(token['CharacterOffsetEnd'])

        for token in tokens:
            word = token['word']
            lemma = token['lemma']
            tag = token['POS']

            if tag in TAG_TO_PART_OF_SPEECH:
                ok = True
                for filter_fun in filters:
                    if not filter_fun(word, lemma, tag):
                        ok = False
                        break
                if ok:
                    candidate_words[(lemma, TAG_TO_PART_OF_SPEECH[tag])] = {
                        'example_sentence': sentence_raw,
                        'word': word
                    }

    not_known = []
    for ((lemma, part_of_speech), d) in candidate_words.iteritems():
        if lemma not in known_words and lemma not in not_known_words:
            not_known.append((lemma, part_of_speech, d))

    for (lemma, part_of_speech, d) in not_known:
        word = d['word']
        example_sentence = d['example_sentence']
        out = '(%s.) %s' % (convert_part_of_speech(part_of_speech), lemma)

        if print_example:
            line = utils.fill_suffix(out, 22, ' ') + ' # ' + example_sentence
            match_pos = re.search(word, example_sentence).start()
            print line.encode('utf-8')
            print((match_pos + 25) * ' ') + (len(word) * '^')
        else:
            print out.encode('utf-8')