def main(): # test the performance of the engineered features # edges list and nodes list file args = ArgParser.parse_args() main_path = args.main_path + 'inputs/' pfilename = args.graph node_filename = main_path + 'nodes_' + pfilename + '.csv' edge_filename = main_path + 'edges_' + pfilename + '.csv' print('Read edge list and node list.') edges_df = pd.read_csv(edge_filename) nodes_df = pd.read_csv(node_filename) # get anchor nodes list print('Retrieve anchor nodes list.') anchor_nodes = nodes_df.loc[nodes_df['is_anchor'] == 1]['node'].tolist() # --- generate features for all anchor nodes of a graph --- print('Generate features for the anchor nodes.') n_eng_features = NodeEngFeatures(nodes_df, edges_df) node_feature_df = n_eng_features.gen_node_features_list(anchor_nodes) # save anchor nodes features to file print(node_feature_df.head()) print('Save node features dataframe.') node_features_filename = main_path + 'features_eth_0.csv' node_feature_df.to_csv(node_features_filename, index=False)
def runDielec(arg): """ Runs UHBD calculation with a set of dielectric constants in dielc_file. """ # Parse the command line arguments dielec = ArgParser.modeClass("dielec",["pdb_file","dielec_file"], ["inpfile","inpfile"],["ionic_strength","pHtitr","out_dir"]) dielec.parseArg(arg) filename = dielec.required["pdb_file"] dielec_file = dielec.required["dielec_file"] output_path = dielec.optional.out_dir pH_start = dielec.optional.pHtitr[0] pH_stop = dielec.optional.pHtitr[1] pH_interval = dielec.optional.pHtitr[2] salt = dielec.optional.ionic_strength f = open(dielec_file) dielectric_constants = [float(d) for d in f.readlines()] f.close() base_path = os.path.join(__init__.invocation_path,output_path) for dielectric in dielectric_constants: print "dielectric constant: %4.2F" % dielectric dielec_path = os.path.join(base_path,"D%.1F" % dielectric) FileOps.makeDir(dielec_path) dielec_output = os.path.join(dielec_path,"%.1F" % salt) FileOps.makeDir(dielec_output) indivRun(filename,dielec_output,pH_start,pH_stop,pH_interval,salt, dielectric)
def main(): """ end-to-end classification """ binary = True # binary or multi-class classification. args = ArgParser.parse_args() input_path = args.input graph_filename = args.graph prod_data_dir = args.output flag = args.flag clf_opt = args.clf_opt exp_id = args.exp_id # data_path = main_path + 'inputs/' # prod_data_dir = main_path + 'outputs/' stats_file = prod_data_dir + 'stats_' + flag + '.csv' edges_filename = input_path + 'edges_' + graph_filename + '.csv' nodes_filename = input_path + 'nodes_' + graph_filename + '.csv' features_filename = prod_data_dir + 'features_' + graph_filename + '.csv' feat_imp_filename = prod_data_dir + 'feature_importance_' + graph_filename + '.csv' if clf_opt == 'fe': # ------------------ Feature Engineering ------------------ # read the input file and generating the features and the labels set print("Node Classification --- Feature Engineering ---") EF_analysis_selected_nodes(prod_data_dir, graph_filename, edges_filename, nodes_filename, features_filename, stats_file, feat_imp_filename, 'FE', binary, rnd_seed, exp_id, extra_analysis=False) print("--- Node Classification Feature Engineering is done ---") # --------------------------------------------------------- elif clf_opt == 'concat': print("Node classification: Concat. FE &" + flag + " embeddings.") emb_file = prod_data_dir + 'emb_' + str(flag) + '_' + graph_filename + '.emb' fe_file = features_filename nd_clf_fe_emb_combined(emb_file, fe_file, stats_file, flag, binary, exp_id) else: # ------------------ RiWalk ------------------------------- print("Node classification: --- RiWalk - " + flag + "---") # set file names emb_filename = prod_data_dir + 'emb_' + str(flag) + '_' + graph_filename + '.emb' RiWalk_analysis_selected_nodes(prod_data_dir, graph_filename, emb_filename, nodes_filename, stats_file, flag, binary, exp_id, extra_analysis=False) print("--- Classification RiWalk is done ---")
def runSingle(arg): """ Runs UHBD calculation on a single file. """ single = ArgParser.modeClass("single",["pdb_file"],["inpfile"], ["dielectric", "ionic_strength","pHtitr","out_dir"]) single.parseArg(arg) indivRun(single.required["pdb_file"],single.optional.out_dir, single.optional.pHtitr[0],single.optional.pHtitr[1], single.optional.pHtitr[2],single.optional.ionic_strength, single.optional.dielectric)
def runSalts(arg): """ Runs UHBD calculation on set of salts in salts_file. """ # Parse the command line arguments salts = ArgParser.modeClass("salts",["pdb_file","salt_file"], ["inpfile","inpfile"],["dielectric","pHtitr","out_dir"]) salts.parseArg(arg) filename = salts.required["pdb_file"] salts_file = salts.required["salt_file"] output_path = salts.optional.out_dir pH_start = salts.optional.pHtitr[0] pH_stop = salts.optional.pHtitr[1] pH_interval = salts.optional.pHtitr[2] dielectric = salts.optional.dielectric # Execute the indivRun function for every salt in salts_file f = open(salts_file) salts = [float(salt) for salt in f.readlines()] f.close() for salt in salts: print "Ionic strength: %4.2F" % salt salt_output = os.path.join(output_path,"%.1F" % salt) # Create output directory try: os.mkdir(os.path.join(__init__.invocation_path,salt_output)) except OSError, value: # Don't stop if we are only overwriting existing directory if value[0] != 17: print 'File error.' print value[0], os.path.join(__init__.invocation_path, salt_output), value[1] sys.exit() # Run UHBD at this specific salt concentration indivRun(filename,salt_output,pH_start,pH_stop,pH_interval,salt, dielectric)
def runDielec(arg): """ Runs UHBD calculation with a set of dielectric constants in dielc_file. """ # Parse the command line arguments dielec = ArgParser.modeClass("dielec",["pdb_file","dielec_file"], ["inpfile","inpfile"],["ionic_strength","pHtitr","out_dir"]) dielec.parseArg(arg) filename = dielec.required["pdb_file"] dielec_file = dielec.required["dielec_file"] output_path = dielec.optional.out_dir pH_start = dielec.optional.pHtitr[0] pH_stop = dielec.optional.pHtitr[1] pH_interval = dielec.optional.pHtitr[2] salt = dielec.optional.ionic_strength f = open(dielec_file) dielectric_constants = [float(d) for d in f.readlines()] f.close() for dielectric in dielectric_constants: print "dielectric constant: %4.2F" % dielectric dielec_output = os.path.join(output_path,"%.1F" % dielectric) # Create output directory try: os.mkdir(os.path.join(__init__.invocation_path,dielec_output)) except OSError, value: # Don't stop if we are only overwriting existing directory if value[0] != 17: print 'File error.' print value[0], os.path.join(__init__.invocation_path, dielec_output), value[1] sys.exit() indivRun(filename,dielec_output,pH_start,pH_stop,pH_interval,salt, dielectric)
def runSalts(arg): """ Runs UHBD calculation on set of salts in salts_file. """ # Parse the command line arguments salts = ArgParser.modeClass("salts",["pdb_file","salt_file"], ["inpfile","inpfile"],["dielectric","pHtitr","out_dir"]) salts.parseArg(arg) filename = salts.required["pdb_file"] salts_file = salts.required["salt_file"] output_path = salts.optional.out_dir pH_start = salts.optional.pHtitr[0] pH_stop = salts.optional.pHtitr[1] pH_interval = salts.optional.pHtitr[2] dielectric = salts.optional.dielectric # Execute the indivRun function for every salt in salts_file f = open(salts_file) salts = [float(salt) for salt in f.readlines()] f.close() base_path = os.path.join(__init__.invocation_path,output_path) dielec_path = os.path.join(base_path,"D%.1F" % dielectric) FileOps.makeDir(dielec_path) for salt in salts: print "Ionic strength: %4.2F" % salt salt_output = os.path.join(dielec_path,"%.1F" % salt) # Create output directory FileOps.makeDir(salt_output) # Run UHBD at this specific salt concentration indivRun(filename,salt_output,pH_start,pH_stop,pH_interval,salt, dielectric)
def main(): """ Experiments """ # retrieve arguments args = ArgParser.parse_args() subgraph_path = args.input graph_name = args.graph prod_data_path = args.output gprep_opt = args.gprep_opt # input file; directly from XBlock node_filename = subgraph_path + 'nodes_' + graph_name + '.csv' edge_filename = subgraph_path + 'edges_' + graph_name + '.csv' # generated file nodelist_RiWalk_filename = prod_data_path + 'nodes_' + graph_name + '.nodelist' edgelist_RiWalk_filename = prod_data_path + 'edges_' + graph_name + '.edgelist' features_filename = prod_data_path + 'features_' + graph_name + '.csv' feat_imp_filename = prod_data_path + 'feature_importance_' + graph_name + '.csv' # elliptic files elliptic_classes_filename = subgraph_path + '_elliptic_txs_classes.csv' elliptic_feature_filename = subgraph_path + '_elliptic_txs_features.csv' nodes = pd.read_csv(node_filename) edges = pd.read_csv(edge_filename) # read elliptic dataset files elliptic_classes = pd.read_csv(elliptic_classes_filename) elliptic_features = pd.read_csv(elliptic_feature_filename) tx_features = ["tx_feat_" + str(i) for i in range(2, 95)] agg_features = ["agg_feat_" + str(i) for i in range(1, 73)] elliptic_features.columns = ["address", "timestamp" ] + tx_features + agg_features elliptic_features = pd.merge(elliptic_features, elliptic_classes, left_on="address", right_on="txId", how='left') elliptic_features['label'] = elliptic_features['label'].apply( lambda x: '0' if x == "unknown" else x) # instantiate a feature engineering object bc_fe = bfe.BitcoinNodeEngFeatures(nodes, edges) # graph preparation tasks if gprep_opt == 'feature': generate_features_for_all_nodes(bc_fe, elliptic_features, prod_data_path, graph_name) elif gprep_opt == 'ri': # generate edge list for RiWalk generate_edgelist_for_RiWalk(bc_fe, edgelist_RiWalk_filename) # generate node list for RiWalk nodes_all_features_df = pd.read_csv(features_filename) feature_rank = pd.read_csv(feat_imp_filename)['feature'].tolist() # selected_features = ['node'] + feature_rank[0:10] selected_features = ['node'] + feature_rank # all the features generate_node_list_for_RiWalk(nodes_all_features_df, selected_features, nodelist_RiWalk_filename) else: raise ValueError("Incorrect value for graph preparation option!")
def main(): """ Experiments """ # path setting binary = True # binary or multi-class classification.ATTENTION: always use BINARY classification # retrieve arguments from argument parser args = ArgParser.parse_args() subgraph_path = args.input graph_filename = args.graph prod_data_path = args.output flag = args.flag clf_opt = args.clf_opt exp_id = args.exp_id stats_file = prod_data_path + 'stats_' + flag + '.csv' edges_filename = subgraph_path + 'edges_' + graph_filename + '.csv' nodes_filename = subgraph_path + 'nodes_' + graph_filename + '.csv' features_filename = prod_data_path + 'features_' + graph_filename + '.csv' feat_imp_filename = prod_data_path + 'feature_importance_' + graph_filename + '.csv' if clf_opt == 'fe': # ------------------ Feature Engineering ------------------ # read the input file and generating the features and the labels set print("Node Classification --- Feature Engineering ---") Bitcoin_EF_analysis_selected_nodes(prod_data_path, graph_filename, features_filename, stats_file, feat_imp_filename, 'FE', binary, rnd_seed, exp_id, extra_analysis=False) print("--- Node Classification Feature Engineering is done ---") # --------------------------------------------------------- elif clf_opt == 'concat': # ------------------ FE & Emb concatenating ------------------ print("Node classification: Concat. FE &" + flag + " embeddings.") emb_file = prod_data_path + 'emb_' + str( flag) + '_' + graph_filename + '.emb' fe_file = features_filename bitcoin_nd_clf_fe_emb_combined(emb_file, fe_file, stats_file, flag, binary, exp_id) # --------------------------------------------------------- else: # ------------------ RiWalk ------------------------------- print("Node classification: --- RiWalk - " + flag + "---") # set file names emb_filename = prod_data_path + 'emb_' + str( flag) + '_' + graph_filename + '.emb' nd.RiWalk_analysis_selected_nodes(prod_data_path, graph_filename, emb_filename, nodes_filename, stats_file, flag, binary, exp_id, extra_analysis=False) print("--- Classification RiWalk is done ---")
def main(args=None): config = configp.start() if args is None: args = parser.start() elif isinstance(args,basestring): args = parser.start(args) cedfile = args.ced stdfile = args.std plotFields = args.field multi = args.multi print_shapes = args.print_shapes print_global_atts = args.print_global_atts print_axis = args.print_axis print_list_synth = args.print_list_synth terrain = args.terrain slope = args.slope meteo = args.meteo valid = args.valid prof = args.prof nearest = args.nearest noplot = args.no_plot """ print synhesis availables """ if print_list_synth: synth_folder=fh.set_working_files(config=config) out=os.listdir(synth_folder) print "Synthesis directories available:" for f in out: print f usr_input = raw_input('\nIndicate directory: ') out=os.listdir(synth_folder+'/'+usr_input) print "\nSyntheses available:" out.sort() for f in out: if f[-3:]=='cdf': print f print '\n' sys.exit() """ retrieves synthesis and flight instances from AircraftAnalysis """ SYNTH, FLIGHT, TERRAIN = fh.set_working_files(cedfile=cedfile, stdfile=stdfile, config=config) """ print shape of attribute arrays """ if print_shapes: SYNTH.print_shapes() if not print_global_atts: sys.exit() """ print global attirutes of cedric synthesis """ if print_global_atts: SYNTH.print_global_atts() sys.exit() """ print axis values """ if print_axis: for ax in print_axis: if ax.isupper(): ax=ax.lower() SYNTH.print_axis(ax) sys.exit() """ print synthesis time """ # print "Synthesis start time :%s" % SYNTH.start # print "Synthesis end time :%s\n" % SYNTH.end """ make synthesis plots """ if plotFields: for f in plotFields: P=Plotter.plot_synth(SYNTH,FLIGHT,TERRAIN, var=f, wind=args.wind, panel=args.panel, slicem = args.slicem, slicez = args.slicez, slice = args.slice, # azimuth = args.azimuth, # distance = args.distance, zoomIn=args.zoomin, mask = args.mask, config=config) """ make terrain plots """ if terrain or slope: # P[0] might produce error if P is not a list, # check in ploth_synth.cross_section Plotter.plot_terrain(P[0], terrain=terrain, slope=slope, terrain_file=config['filepath_dtm']) """ make flight level meteo plot """ if meteo: Plotter.plot_flight_meteo(SYNTH,FLIGHT) """ compare synth and flight level """ if valid: out = Plotter.compare_synth_flight(SYNTH,FLIGHT, level=valid, zoomin=args.zoomin, noplot=noplot) # if 'wind_profiler' in config: # case=int(cedfile[1:3]) # Plotter.compare_with_windprof(SYNTH, # location=config['wind_profiler'], # case=case) return out """ make profile from synthesis """ if prof: if nearest is None: markers=['o','s','D','*'] out = Plotter.make_synth_profile(SYNTH,coords=prof, markers=markers, noplot=noplot) else: ' (4.5km,n=12) or (7.0km,n=30) seem good choices ' out = Plotter.make_synth_profile_withnearest(SYNTH, target_latlon=prof, max_dist=nearest[0][0], # [km] n_neigh =nearest[0][1]) try: ' if P exists ' for i,p in enumerate(prof): lat,lon = p P.haxis.scatter(lon,lat, s=config['sounding_size'], c=config['sounding_color'], marker=config['sounding_marker'], lw=3) except UnboundLocalError: ' if P does not exist just pass ' pass return out # if turbulence: # Plotter.print_covariance(SYNTH,FLIGHT) # Plotter.print_correlation(SYNTH,FLIGHT) # Plotter.plot_wind_comp_var(SYNTH,FLIGHT) # Plotter.plot_tke(SYNTH,FLIGHT) # Plotter.plot_vertical_heat_flux(SYNTH,FLIGHT) # Plotter.plot_vertical_momentum_flux(SYNTH,FLIGHT,config['filepath_dtm']) # Plotter.plot_turbulence_spectra(SYNTH,FLIGHT) if multi: plt.close('all') return P else: ''' use this one with ipython ''' plt.show(block=False) ''' use this one with the shell '''
# if turbulence: # Plotter.print_covariance(SYNTH,FLIGHT) # Plotter.print_correlation(SYNTH,FLIGHT) # Plotter.plot_wind_comp_var(SYNTH,FLIGHT) # Plotter.plot_tke(SYNTH,FLIGHT) # Plotter.plot_vertical_heat_flux(SYNTH,FLIGHT) # Plotter.plot_vertical_momentum_flux(SYNTH,FLIGHT,config['filepath_dtm']) # Plotter.plot_turbulence_spectra(SYNTH,FLIGHT) if multi: plt.close('all') return P else: ''' use this one with ipython ''' plt.show(block=False) ''' use this one with the shell ''' # plt.show() """call main function """ if __name__ == "__main__": args = parser.start() p = main(args)
import ArgParser import sys Parser = ArgParser.ArgParser(sys.argv) print(Parser.ParsedArgs)
def main(): """ instantiate a node2vec object """ print("Node2Vec main method.") start_time = time.time() args = ArgParser.parse_args() main_path = args.main_path graph_name = args.graph iter_num = args.iter num_walks = args.num_walks dim = args.dimensions walk_length = args.walk_length workers = args.workers window_size = args.window_size p = args.p q = args.q data_path = main_path + '/inputs/' edge_list_file_name = data_path + graph_name + '.edgelist' node_list_file_name = data_path + 'nodes_' + graph_name + '.csv' output_path = main_path + '/outputs/' stats_file = output_path + 'stats.csv' nx_g = nx.from_pandas_edgelist(pd.read_csv(edge_list_file_name), source='source', target='target', create_using=nx.DiGraph()) print("Graph info:", nx.info(nx_g)) # for edge in nx_g.edges(): # nx_g[edge[0]][edge[1]]['weight'] = 1 # nx_g = nx_g.to_undirected() print("\tInstantiate a node2vec object.") node2vec = Node2Vec(nx_g, dimensions=dim, walk_length=walk_length, num_walks=num_walks, workers=workers, p=p, q=q) print("\tFit node2vec.") model = node2vec.fit(window=window_size, sg=1, hs=0, min_count=1, iter=iter_num) # read node list print("\tExtract embeddings and labels for the anchor nodes.") nodes_df = pd.read_csv(node_list_file_name) # binary classification anchor nodes anchor_nodes_df = nodes_df.loc[nodes_df['is_anchor'] == 1, ['node', 'isp']] node_list = [str(node_id) for node_id in anchor_nodes_df['node'].tolist()] embeddings = [model.wv.get_vector(node) for node in node_list] labels = anchor_nodes_df['isp'].tolist() # classification print("\tApply classification.") rnd_seed = 42 binary = True X_train, X_test, y_train, y_test = NodeClassification.train_test_split( embeddings, labels, rnd_seed) NodeClassification.rf_lr_classification(X_train, X_test, y_train, y_test, stats_file, 'n2v', binary=binary, print_report=True) print("Total elapsed time:", str(time.time() - start_time))