Python append Examples

Programming Language: Python

Namespace/Package Name: nltk.corpus.brown

Method/Function: append

Examples at hotexamples.com: 11

Python append - 11 examples found. These are the top rated real world Python examples of nltk.corpus.brown.append extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def load_file(filename):
    corpus = []
    doc_ids = []
    event_list = []
    fname_total = '%s/%s' % (ip_dir, filename)
    #print 'fname_total : ', fname_total
    f = open(fname_total, 'rb')
    story_dic = pickle.load(f)
    f.close()
    total_no_word = 0
    for story in sorted(story_dic):
        #print(story_dic[story]['NER'].keys())
        temp_doc = []
        for item in ['PER', 'LOC', 'ORG', 'ONS']:
            temp_doc += story_dic[story]['NER']['TITLE_CONTENT'][item]
        doc_id = story.strip('.html').strip('.htm')
        event_id = '_'.join(doc_id.split('_')[:2])
        if len(temp_doc) > 0:
            corpus.append(temp_doc)
            doc_ids.append(doc_id)
            total_no_word += len(temp_doc)
            if event_id not in event_list:
                event_list.append(event_id)
    f.close()
    return corpus, doc_ids, event_list, total_no_word

Example #2

Show file

def load_file(filename):
    corpus = []
    with codecs.open(filename, encoding='utf8') as freader:
        for line in freader:
            doc = line.strip().split(',')
            corpus.append(doc)
    return corpus

Example #3

Show file

File: vocabulary.py Project: adityamarella/yelpdatamining

def load_sentences(sentences):
    corpus = []
    for line in sentences:
        doc = re.findall(r'\w+(?:\'\w+)?',line)
        if len(doc)>0:
            corpus.append(doc)
    return corpus

Example #4

Show file

def load_dataframe_jp(documents):
    corpus = []
    tagger = MeCab.Tagger('-O wakati')
    tagger.parse("")
    for doc in documents:
        tokens = tagger.parse(doc.strip()).split()
        corpus.append(tokens)
    return corpus

Example #5

Show file

def load_dataframe(documents):
    corpus = []
    for doc in documents:
        sentences = re.findall(r'\w+(?:\'\w+)?', doc)
        if len(sentences) > 0:
            corpus.append(sentences)

    return corpus

Example #6

Show file

File: vocabulary.py Project: adityamarella/yelpdatamining

def load_file(filename):
    corpus = []
    f = open(filename, 'r')
    for line in f:
        doc = re.findall(r'\w+(?:\'\w+)?',line)
        if len(doc)>0:
            corpus.append(doc)
    f.close()
    return corpus

Example #7

Show file

File: vocabulary.py Project: eziaeika/100DaysOfMLCode

def load_file(filename):
    corpus = []
    f = open(filename, 'r')
    for line in f:
        doc = re.findall(r'\w+(?:\'\w+)?', line)
        if len(doc) > 0:
            corpus.append(doc)
    f.close()
    return corpus

Example #8

Show file

def load_file(filename):
    """
    for one file
    one line corresponds to one doc
    """
    corpus = []
    f = open(filename, 'r')
    for line in f:
        doc = re.findall(r'\w+(?:\'\w+)?', line)
        if len(doc) > 0:
            corpus.append(doc)
    f.close()
    return corpus

Example #9

Show file

def load_file_reuter(filename):
    corpus = []
    doc_ids = []
    event_list = []
    fname_total = '%s/%s' % (ip_dir, filename)
    #print 'fname_total : ', fname_total
    f = open(fname_total, 'rb')
    story_dic = pickle.load(f)
    f.close()
    total_no_word = 0
    for story in sorted(story_dic):
        event_id = story_dic[story]['topic']
        corpus.append(story_dic[story]['content'])
        doc_ids.append(story)
        if event_id not in event_list:
            event_list.append(event_id)

    f.close()
    return corpus, doc_ids, event_list, total_no_word

Example #10

Show file

def load_file(filename):
    corpus = []
    doc_ids = []
    event_list = []
    f = open(filename, 'r')
    story_dic = pickle.load(f)
    f.close()
    for story in sorted(story_dic):
        title_content = story_dic[story]['CONTENT'] + story_dic[story]['TITLE']
        doc = re.findall(r'\w+(?:\'\w+)?', title_content)  # tokenizing here
        doc_id = story.strip('.html').strip('.htm')
        event_id = '_'.join(doc_id.split('_')[:2])
        #print 'doc in load file ' , doc
        if len(doc) > 0:
            corpus.append(doc)
            doc_ids.append(doc_id)
            if event_id not in event_list:
                event_list.append(event_id)
    f.close()
    return corpus, doc_ids, event_list

Example #11

Show file

File: vocabulary.py Project: whitezhang/TopicModel

def load_file(filename, format=False):
    corpus = []
    if format == False:
        f = open(filename, 'r')
        for line in f:
            doc = re.findall(r'\w+(?:\'\w+)?',line)
            if len(doc)>0:
                corpus.append(doc)
        f.close()
        return corpus
    else:
        f = codecs.open(filename, 'r', 'utf-8')
        # f = open(filename, 'r')
        lines = f.readlines()
        i = 0
        pre_doc = ""
        for line in lines:
            # doc = re.findall(r'\w+(?:\'\w+)?',line)
            # doc = line.encode("utf-8").split(" ")
            doc = re.findall(r'\w+(?:\'\w+)?',line.encode("utf-8"))
            if len(doc)>0:
                corpus.append(doc)
                pre_doc = doc
            else:
                corpus.append(pre_doc)
        f.close()
        return corpus