Python FileUtils.copy Examples

Programming Language: Python

Namespace/Package Name: utils

Class/Type: FileUtils

Method/Function: copy

Examples at hotexamples.com: 2

Python FileUtils.copy - 2 examples found. These are the top rated real world Python examples of utils.FileUtils.copy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

input(10)

doCopy(9)

delFile(8)

rename(6)

getfile(6)

get_file(5)

FileUtils(5)

make_dirs(4)

write_to_file(3)

get_file_name(3)

to_ENML(3)

remove_dirs(3)

list_top_folders_names(3)

list_top_files_names(3)

join(3)

int_input(3)

savaToPng(2)

read_from_file(2)

make_dir(2)

is_file(2)

get_reqd_fileset(2)

get_plan_from_file(2)

joinPath(2)

exists(2)

get_empty_file(2)

get_content_from_data(2)

screen_shot_dir(1)

removeFile(1)

exist_file_folder(1)

cache_dir(1)

download(1)

saveBinary(1)

get_files(1)

create_file(1)

read_excel_file(1)

transformations(1)

update_files_list(1)

writePlanToFile(1)

writeProblemToFile(1)

write_series2file(1)

transformation_v1_to_v2(1)

random_path(1)

read(1)

delete_file(1)

get_observed_data_filename(1)

copy(1)

configureWrite(1)

add_to_pickle(1)

getNumerosFromFile(1)

getDataset(1)

Example #1

Show file

File: analyze.py Project: eduOSS/LearnRead

def analyze(filename, known_words_filepath, not_known_words_filepath, print_example):
    known_words = set(FileUtils.read(known_words_filepath).split())
    not_known_words = set(FileUtils.read(not_known_words_filepath).split())
    tmp_filepath = FileUtils.random_path()
    output_filepath = tmp_filepath + '.xml'
    FileUtils.copy(filename, tmp_filepath)

    croncob_word_list = os.path.join('data', 'corncob_lowercase.txt')
    word_filter = WordFilterFactory.create_word_filter(croncob_word_list)

    cmd = ['java',
           '-cp',
           'stanford-corenlp-full/stanford-corenlp-3.3.1.jar:stanford-corenlp-full/stanford-corenlp-3.3.1-models.jar:stanford-corenlp-full/xom.jar:stanford-corenlp-full/joda-time.jar:stanford-corenlp-full/jollyday.jar:stanford-corenlp-full/ejml-0.23.jar',
           '-Xmx2g',
           'edu.stanford.nlp.pipeline.StanfordCoreNLP',
           '-annotators',
           'tokenize,ssplit,pos,lemma',
           '-file',
           tmp_filepath,
           '-outputDirectory',
           '/tmp/'
    ]
    subprocess.call(cmd)
    raw_output = FileUtils.read(output_filepath)
    d = xmltodict.parse(raw_output)
    sentences = d['root']['document']['sentences']['sentence']

    candidate_words = defaultdict(dict)

    def word_filter_fun(word, lemma, tag):
        del word
        del tag
        return word_filter.isok(lemma)

    def adjective_filter_fun(word, lemma, tag):
        del word
        del lemma
        if tag in ['JJR', 'JJS']:
            return False
        else:
            return True

    filters = [
            word_filter_fun,
            adjective_filter_fun
    ]

    for sentence_dict in sentences:
        tokens = sentence_dict['tokens']['token']
        if not isinstance(tokens, list):
            continue

        last_offset = int(tokens[0]['CharacterOffsetBegin'])
        sentence_raw = ''
        for token in tokens:
            word = token['word']
            begin_offset = int(token['CharacterOffsetBegin'])
            sentence_raw += (begin_offset - last_offset) * ' '
            sentence_raw += word
            last_offset = int(token['CharacterOffsetEnd'])

        for token in tokens:
            word = token['word']
            lemma = token['lemma']
            tag = token['POS']

            if tag in TAG_TO_PART_OF_SPEECH:
                ok = True
                for filter_fun in filters:
                    if not filter_fun(word, lemma, tag):
                        ok = False
                        break
                if ok:
                    candidate_words[(lemma, TAG_TO_PART_OF_SPEECH[tag])] = {
                        'example_sentence': sentence_raw,
                        'word': word
                    }

    not_known = []
    for ((lemma, part_of_speech), d) in candidate_words.iteritems():
        if lemma not in known_words and lemma not in not_known_words:
            not_known.append((lemma, part_of_speech, d))

    for (lemma, part_of_speech, d) in not_known:
        word = d['word']
        example_sentence = d['example_sentence']
        out = '(%s.) %s' % (
            convert_part_of_speech(part_of_speech),
            lemma
            )

        if print_example:
            line = utils.fill_suffix(out, 22, ' ') + ' # ' + example_sentence
            match_pos = re.search(word, example_sentence).start()
            print line.encode('utf-8')
            print ((match_pos + 25) * ' ') + (len(word) * '^')
        else:
            print out.encode('utf-8')

Example #2

Show file

def analyze(filename, known_words_filepath, not_known_words_filepath,
            print_example):
    known_words = set(FileUtils.read(known_words_filepath).split())
    not_known_words = set(FileUtils.read(not_known_words_filepath).split())
    tmp_filepath = FileUtils.random_path()
    output_filepath = tmp_filepath + '.xml'
    FileUtils.copy(filename, tmp_filepath)

    croncob_word_list = os.path.join('data', 'corncob_lowercase.txt')
    word_filter = WordFilterFactory.create_word_filter(croncob_word_list)

    cmd = [
        'java', '-cp',
        'stanford-corenlp-full/stanford-corenlp-3.3.1.jar:stanford-corenlp-full/stanford-corenlp-3.3.1-models.jar:stanford-corenlp-full/xom.jar:stanford-corenlp-full/joda-time.jar:stanford-corenlp-full/jollyday.jar:stanford-corenlp-full/ejml-0.23.jar',
        '-Xmx2g', 'edu.stanford.nlp.pipeline.StanfordCoreNLP', '-annotators',
        'tokenize,ssplit,pos,lemma', '-file', tmp_filepath, '-outputDirectory',
        '/tmp/'
    ]
    subprocess.call(cmd)
    raw_output = FileUtils.read(output_filepath)
    d = xmltodict.parse(raw_output)
    sentences = d['root']['document']['sentences']['sentence']

    candidate_words = defaultdict(dict)

    def word_filter_fun(word, lemma, tag):
        del word
        del tag
        return word_filter.isok(lemma)

    def adjective_filter_fun(word, lemma, tag):
        del word
        del lemma
        if tag in ['JJR', 'JJS']:
            return False
        else:
            return True

    filters = [word_filter_fun, adjective_filter_fun]

    for sentence_dict in sentences:
        tokens = sentence_dict['tokens']['token']
        if not isinstance(tokens, list):
            continue

        last_offset = int(tokens[0]['CharacterOffsetBegin'])
        sentence_raw = ''
        for token in tokens:
            word = token['word']
            begin_offset = int(token['CharacterOffsetBegin'])
            sentence_raw += (begin_offset - last_offset) * ' '
            sentence_raw += word
            last_offset = int(token['CharacterOffsetEnd'])

        for token in tokens:
            word = token['word']
            lemma = token['lemma']
            tag = token['POS']

            if tag in TAG_TO_PART_OF_SPEECH:
                ok = True
                for filter_fun in filters:
                    if not filter_fun(word, lemma, tag):
                        ok = False
                        break
                if ok:
                    candidate_words[(lemma, TAG_TO_PART_OF_SPEECH[tag])] = {
                        'example_sentence': sentence_raw,
                        'word': word
                    }

    not_known = []
    for ((lemma, part_of_speech), d) in candidate_words.iteritems():
        if lemma not in known_words and lemma not in not_known_words:
            not_known.append((lemma, part_of_speech, d))

    for (lemma, part_of_speech, d) in not_known:
        word = d['word']
        example_sentence = d['example_sentence']
        out = '(%s.) %s' % (convert_part_of_speech(part_of_speech), lemma)

        if print_example:
            line = utils.fill_suffix(out, 22, ' ') + ' # ' + example_sentence
            match_pos = re.search(word, example_sentence).start()
            print line.encode('utf-8')
            print((match_pos + 25) * ' ') + (len(word) * '^')
        else:
            print out.encode('utf-8')