Python TCustomCounter Examples

Programming Language: Python

Namespace/Package Name: utils

Class/Type: TCustomCounter

Examples at hotexamples.com: 3

Python TCustomCounter - 3 examples found. These are the top rated real world Python examples of utils.TCustomCounter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: build_index_reduce.py Project: mavlyutovrus/light_search

def prepare_matches(chunk_fname, keys_out_fname, values_out_fname, pid=""):
    keys_out = open(keys_out_fname, "wb", buffering=1000000)
    values_out = open(values_out_fname, "wb", buffering=1000000)
    add_values_out = open(values_out_fname + ".add", "w", buffering=1000000)
    all_codes = []
    tokens = {}
    tokens_freqs = {}
    for line in open(chunk_fname):
        token, token_codes = line.strip().split("\t")
        token_codes = [int(code) for code in token_codes.split()]
        tokens.setdefault(token, []).append(len(all_codes))
        all_codes.append(token_codes)
        tokens_freqs.setdefault(token, 0)
        tokens_freqs[token] += len(token_codes)
    progress_counter = TCustomCounter("Reducer%s" % (str(pid)), sys.stdout, verbosity=1, interval=10000)
    for token, chunks in tokens.items():
        token_freq = tokens_freqs[token]
        if token_freq < MIN_WORD_FREQ_FOR_INDEX or token_freq > MAX_WORD_FREQ_FOR_INDEX:
            continue
        word_codes = []
        for chunk_index in chunks:
            word_codes += all_codes[chunk_index]
            all_codes[chunk_index] = []
        word_codes.sort()
        start_position = values_out.tell()
        for code in word_codes:
            values_out.write(pack("Q", code))
        pickle.dump((token, token_freq, start_position), keys_out)
        progress_counter.add()

Example #2

Show file

File: crawler.py Project: mavlyutovrus/light_search

 def crawl_folder(self, folder):
     object_folders = crawl_folder(folder)
     import sys
     processed_counter = TCustomCounter("Crawler, found objects", sys.stderr, self.verbosity, 100)
     for object_folder, object_id in object_folders:
         fields2update = self.crawl_object_fields(object_folder, object_id)
         object2update = TIndexingObjectData(object_id=object_id,
                                             object_fields=fields2update)
         yield object2update
         processed_counter.add()

Example #3

Show file

File: crawler.py Project: mavlyutovrus/light_search

 def crawl_csv(self, csv_file_path):
     field_index2name = {1:"year", 
                         2:"udc", 
                         #3:"class_level1", 
                         #4:"class_level2", 
                         #5:"class_level3",
                         6:"pages_count", 
                         7: "author", 
                         8:"title"  }
     hierarchy_indices = [3, 4, 5] 
     
     import sys
     processed_counter = TCustomCounter("Crawler, found objects", sys.stderr, self.verbosity, 1000)
     encoding = chardet.detect(open(csv_file_path).read())['encoding']
     all_hierarchy_codes = {}
     for line in open(csv_file_path):
         line = line.decode(encoding)
         field_values = line.strip().split(";")
         object_id = field_values[0]
         fields = []
         for field_index, field_id in field_index2name.items():
             if len(field_values) > field_index:
                 field_value_encoded = field_values[field_index].encode(DEFAULT_ENCODING)
                 fields.append(TIndexingObjectField(field_id, 
                                                    field_value=field_value_encoded, 
                                                    field_file_path=""))
         """ library section feature """   
         hierarchy_codes = []
         import hashlib
         hash = hashlib.md5()
         path = ""
         for hierarchy_feat_index in hierarchy_indices:
             node_name = field_values[hierarchy_feat_index].strip()
             if not node_name:
                 break
             hash.update(node_name.encode("utf8"))
             code = int(hash.hexdigest(), 16) % 1000000007
             path += node_name + ";"
             hierarchy_codes.append(code)
             if not code in all_hierarchy_codes:
                 all_hierarchy_codes[code] = path
             elif code in all_hierarchy_codes and all_hierarchy_codes[code] != path:
                 print "Hash collision:", path.encode("utf8"), "vs.", all_hierarchy_codes[code].encode("utf8")
                 print "FULL STOP"
                 exit()
                  
         fields.append(TIndexingObjectField(field_id=LIB_SECTION_FIELD, 
                                            field_value=hierarchy_codes, 
                                            field_file_path=""))
         
         object2update = TIndexingObjectData(object_id=object_id,
                                             object_fields=fields)
         yield object2update
         processed_counter.add()
     """