コード例 #1
0
def load_ch_matrix():
    print "Reading NodeID map started at", time.strftime(time_format)
    start = time.time()
    node_id_map = inout.load_from_file(path +
                                       "{0}_node_id_map".format(dataset))
    print "Reading NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Reading hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph.load_from_file(path + "{0}_hgraph".format(dataset))
    print "Reading hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Reading characteristic matrix started at", time.strftime(
        time_format)
    start = time.time()
    ch_matrix = CharacteristicMatrix.load_from_file(
        path + "{0}_ch_matrix".format(dataset))
    print "Reading characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Reading Column index to Node map started at", time.strftime(
        time_format)
    start = time.time()
    index_node_map = inout.load_from_file(path +
                                          "{0}_index_node_map".format(dataset))
    print "Reading Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"

    return ch_matrix, hypergraph, index_node_map, node_id_map
コード例 #2
0
def load_ch_matrix():
    print "Reading NodeID map started at", time.strftime(time_format)
    start = time.time()
    node_id_map = inout.load_from_file(path + "{0}_node_id_map".format(dataset))
    print "Reading NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Reading hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph.load_from_file(path + "{0}_hgraph".format(dataset))
    print "Reading hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Reading characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    ch_matrix = CharacteristicMatrix.load_from_file(path + "{0}_ch_matrix".format(dataset))
    print "Reading characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Reading Column index to Node map started at", time.strftime(time_format)
    start = time.time()
    index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset))
    print "Reading Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    return ch_matrix, hypergraph, index_node_map, node_id_map
コード例 #3
0
def build_svmlight_chemical_data(in_files, wl_iterations, output_dir, format_rdf=False, compounds_targets_file=None, uri_prefix=None,
                                 shingles_type="features", window_size=5, accumulate_wl_shingles=True, fingerprints=False,
                                 sort_rdf_nodes_before_processing=True, state_input_file=None, state_output_file=None,
                                 save_just_last_wl_it=False):
    if format_rdf:
        assert type(in_files) is list
        assert bool(compounds_targets_file)
        assert bool(uri_prefix)
    else:
        if type(in_files) is list:
            in_files = in_files[0]
    
    files = []
    
    for i in range(wl_iterations + 1):
        files.append(open(output_dir + "svm_light_data_wl_{0}".format(i), "w"))
    
    if state_input_file:
        state = inout.load_from_file(state_input_file)
        state['files'] = files
    else:
        wl_state = {"wl_state": None}
        shingle_id_map = {}
        if not fingerprints:
            if accumulate_wl_shingles:
                wl_state["next_shingle_id"] = 1
            else:
                for i in range(wl_iterations + 1):
                    wl_state["wl_{0}_next_shingle_id".format(i)] = 1
        
        state = {
            "files": files,
            "wl_state": wl_state,
            "shingle_id_map": shingle_id_map,
            "rdf_colors": {'colors': None, 'next_color_id': None}
        }
    
    def process_compound(chem_record):
        process_record(chem_record, wl_iterations, state, binary_target_labels=True,
                       shingles_type=shingles_type, window_size=window_size, accumulate_wl_shingles=accumulate_wl_shingles,
                       fingerprints=fingerprints, save_just_last_wl_it=save_just_last_wl_it)
    
    if format_rdf:
        chem_database, state['rdf_colors'] = prepare_rdf_chemical_data(in_files, compounds_targets_file, uri_prefix, process_compound,
                                                  sort_rdf_nodes_before_processing=sort_rdf_nodes_before_processing,
                                                  rdf_colors_state=state['rdf_colors'])
    else:
        chem_database = read_chemical_compounts(in_files, process_compound)
    
    for i, _ in enumerate(chem_database):
        print i
        
    for f in files:
        f.close()
    del state['files']
    
    if state_output_file:
        inout.save_to_file(state, state_output_file)
    
    print "Done."
コード例 #4
0
def load_sketch_matrix():
    print "Reading NodeID map started at", time.strftime(time_format)
    start = time.time()
    node_id_map = inout.load_from_file(path + "{0}_node_id_map".format(dataset))
    print "Reading NodeID map took", time.time() - start, "s"
    
    print "Reading sketch matrix started at", time.strftime(time_format)
    start = time.time()
    sketch_matrix = SketchMatrix.load_from_file(path + "{0}_sketch".format(dataset))
    print "Reading sketch matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Reading Column index to Node map started at", time.strftime(time_format)
    start = time.time()
    index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset))
    print "Reading Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    return sketch_matrix, index_node_map, node_id_map
コード例 #5
0
def load_sketch_matrix():
    print "Reading NodeID map started at", time.strftime(time_format)
    start = time.time()
    node_id_map = inout.load_from_file(path +
                                       "{0}_node_id_map".format(dataset))
    print "Reading NodeID map took", time.time() - start, "s"

    print "Reading sketch matrix started at", time.strftime(time_format)
    start = time.time()
    sketch_matrix = SketchMatrix.load_from_file(path +
                                                "{0}_sketch".format(dataset))
    print "Reading sketch matrix took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Reading Column index to Node map started at", time.strftime(
        time_format)
    start = time.time()
    index_node_map = inout.load_from_file(path +
                                          "{0}_index_node_map".format(dataset))
    print "Reading Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"

    return sketch_matrix, index_node_map, node_id_map
コード例 #6
0
ファイル: temp.py プロジェクト: idanivanov/master_thesis
 def read_r_balls_database(r_balls_directory, r_balls_count):
     for i in range(r_balls_count):
         record = inout.load_from_file(r_balls_directory + "r_ball_{0}".format(i))
         yield record
コード例 #7
0
 def load_from_file(in_file, compressed=True):
     return inout.load_from_file(in_file, compressed)
コード例 #8
0
 def read_r_balls_database(r_balls_directory, r_balls_count):
     for i in range(r_balls_count):
         record = inout.load_from_file(r_balls_directory +
                                       "r_ball_{0}".format(i))
         yield record
コード例 #9
0
 def load_from_file(in_file, compressed=True):
     return inout.load_from_file(in_file, compressed)