Exemple #1
1
def parse_sgml(r, table):
    start_time = time.time()
    total_topics, total_texts = prepare_reuters21578(r)
    dict = dict_by_dict(r, total_texts, table)

    madlib = open(table + '.madlib', 'w')

    j = 1
    for key, value in total_texts.iteritems():
        #print key
        lines = value.split('\n')
        words = utility.get_file_words(lines)
   
        if len(words) == 0: continue
        #for madlib

        line = utility.get_madlib_line(words, dict)
        madlib.write(('%s : %s') % (str(j), line))
        j = j + 1
    madlib.close()
    elapsed_time = time.time() - start_time
    print r, 'parsing time:', elapsed_time
Exemple #2
0
def parse_sgml(r, table):
    start_time = time.time()
    total_topics, total_texts = prepare_reuters21578(r)
    dict = dict_by_dict(r, total_texts, table)

    madlib = open(table + '.madlib', 'w')

    j = 1
    for key, value in total_texts.iteritems():
        #print key
        lines = value.split('\n')
        words = utility.get_file_words(lines)

        if len(words) == 0: continue
        #for madlib

        line = utility.get_madlib_line(words, dict)
        madlib.write(('%s : %s') % (str(j), line))
        j = j + 1
    madlib.close()
    elapsed_time = time.time() - start_time
    print r, 'parsing time:', elapsed_time
Exemple #3
0
def parse_normal(r, parse_method, dict_method, table):
    dataset = table
    start_time = time.time()
    #topic_method(r)
    file_path_list, file_name_list = utility.get_file_list(r)
    dict = dict_method(r, table, parse_method)

    madlib = open(dataset + '.madlib', 'w')

    file_count = len(file_path_list)

    for i in range(0, file_count):
        lines = parse_method(file_path_list[i])
        words = utility.get_file_words(lines)
        if len(words) == 0: continue
        #for madlib

        line = utility.get_madlib_line(words, dict)
        madlib.write(('%s : %s') % (str(i + 1), line))

    madlib.close()
    elapsed_time = time.time() - start_time
    print r, 'parsing time:', elapsed_time
Exemple #4
0
def parse_normal(r, parse_method, dict_method, table):
    dataset = table
    start_time = time.time()
    #topic_method(r)
    file_path_list, file_name_list = utility.get_file_list(r)
    dict  = dict_method(r, table, parse_method)

    madlib = open(dataset + '.madlib', 'w')

    file_count = len(file_path_list)

    for i in range(0, file_count):
        lines = parse_method(file_path_list[i])
        words = utility.get_file_words(lines)
        if len(words) == 0: continue
        #for madlib
        
        line = utility.get_madlib_line(words, dict)
        madlib.write(('%s : %s') % (str(i + 1), line))

    madlib.close()
    elapsed_time = time.time() - start_time
    print r, 'parsing time:', elapsed_time