def scan( out_fd ): os.chdir( TEMP_DIRNAME ) temp_files = os.listdir( CURR_DIR ) s_files = [ s for s in temp_files if S_PREFIX in s ] hash_match = 0 for s in s_files: s_hash = proccess_input( f( s ) ) hash_match += scan_trough_qs( s_hash, out_fd ) print ( "Found {} entries".format( hash_match ) )
def make_parts( in_fd, prefix = 'unkn_' ): n = out_file_id = 0 line = in_fd.readline() #print( os.getcwd() ) os.chdir( TEMP_DIRNAME ) out_fd = f ( '{}{}'.format( prefix, out_file_id ), mode = 'w' ) while line: if n != 0 and ( n % SHARD_SIZE ) == 0: out_fd.close() out_file_id += 1 out_fd = f ( '{}{}'.format( prefix, out_file_id ), mode = 'w' ) out_fd.write( line ) n += 1 line = in_fd.readline() out_fd.close() os.chdir( '..' ) result = out_file_id if out_file_id > 0 else 1 return result
def scan_trough_qs( s_hash, out_fd): temp_files = os.listdir( CURR_DIR ) q_files = [ s for s in temp_files if Q_PREFIX in s ] hash_match = 0 for q in q_files: q_hash = proccess_input( f( q ) ) #print( '{}{}'.format ( s_hash, q_hash ) ) m, merged = compare_hashes( s_hash, q_hash ) hash_match += m out.writelines( merged ) return hash_match
def runtime(): with f( fname ) as fd: line = fd.readline() while line: tokens = line.strip('\n').split('\t') #ugly thing req_id = tokens[1] req_type = tokens[2] req = curr_reqs.get( req_id ) if req == None: req = Request() curr_reqs[req_id] = req req.proccess( tokens ) line = fd.readline()
# print( input_file.readlines() ) for line in input_file.readlines(): num_id = line[:9] data = line[10:].strip( '\n' ) result.setdefault( num_id, [] ).append( data ) return result if __name__ == '__main__': if len( argv ) < 3: print ( '{0} usage syntax {0} <input a file name> <input b file name>'.format( 'simple.py' ) ) quit() fname_a = argv[1] fname_b = argv[2] fname_res = 'result.csv' input_a = f( fname_a, buffering = 1 ) input_b = f( fname_b, buffering = 1 ) out = f( fname_res, mode = 'w' ) dict_a = {} dict_b = {} merged = [] dict_a = proccess_input( input_a ) dict_b = proccess_input( input_b ) """ print( 'dict_a') print( dict_a ) print( 'dict_b') print( dict_b ) """ for k, a_v in dict_a.items():