def find_adjacencies_command(args): """ Infer co-expression modules. """ LOGGER.info("Loading expression matrix.") ex_mtx = _load_expression_matrix(args) tf_names = load_tf_names(args.tfs_fname.name) n_total_genes = len(ex_mtx.columns) n_matching_genes = len(ex_mtx.columns.isin(tf_names)) if n_total_genes == 0: LOGGER.error( "The expression matrix supplied does not contain any genes. Make sure the extension of the file matches the format (tab separation for TSV and comma sepatration for CSV)." ) sys.exit(1) if float(n_matching_genes) / n_total_genes < 0.80: LOGGER.warning( "Expression data is available for less than 80% of the supplied transcription factors." ) LOGGER.info("Inferring regulatory networks.") client, shutdown_callback = _prepare_client(args.client_or_address, num_workers=args.num_workers) try: network = grnboost2(expression_data=ex_mtx, tf_names=tf_names, verbose=True, client_or_address=client) finally: shutdown_callback(False) LOGGER.info("Writing results to file.") network.to_csv(args.output, index=False, sep='\t')
def Create_Graph(idList, labelList, percentage, netthreshold): #CREACIÓN DEL GRAFO dfz = load_gexpressions(idList, labelList, percentage) #Preparo Dataframe de forma que contenga todos los genes en las columnas dfinvert = dfz.transpose() #Obtengo la lista de genes TF_names = list(dfinvert) client = Client(processes=False) network = grnboost2(expression_data=dfinvert, tf_names=TF_names, client_or_address=client) # generate network #networkG = genie3(expression_data=dfinvert, tf_names=TF_names) # generate network #ts = time.time() #st = datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H%M%S') #network.to_csv(path + "\\figures\\network_" + st + ".csv") #network=pd.read_csv("network.csv") #labels=list(dfinvert) limit = network.index.size * netthreshold G = nx.from_pandas_edgelist(network.head(int(limit)), 'TF', 'target', ['importance'], create_using=nx.Graph(directed=False)) print(nx.info(G)) return G
def process(mtx_fname, tfs, net_fname, client): network = grnboost2(expression_data=pd.read_csv(mtx_fname, sep='\t', index_col=0).T, tf_names=tfs, verbose=True, client_or_address=client) network.to_csv(net_fname, index=False)
def generate_grnets(idList, labelList, percentage, netthreshold): path = os.getcwd() dfz = load_gexpressions(idList, labelList, percentage) ## generate network netdata = dfz.T # rotate matrix network = grnboost2(expression_data=netdata, tf_names=list(netdata)) # generate network #network = genie3(expression_data=netdata, tf_names=list(netdata)) # generate network network.rename(columns={'importance': 'value'}, inplace=True) # Build your graph limit = network.index.size * netthreshold G = nx.from_pandas_dataframe(network.head(int(limit)), 'TF', 'target', create_using=nx.Graph()) #G=nx.from_pandas_dataframe(network, 'TF', 'target', create_using=nx.Graph() ) pos = nx.spring_layout(G, scale=10, dim=2) #pos = nx.circular_layout(G) #pos = nx.shell_layout(G) #pos = nx.spectral_layout(G) # Custom the nodes: #nx.draw(G, with_labels=True, node_color='blue', node_size=1500, edge_color=network['value'].head(100), width=10.0, edge_cmap=plt.cm.Blues) #nx.draw(G, with_labels=False, node_color='r', alpha=0.5, node_size=500, edge_color=network['value'].head(len(G.edges(data=True))), width=10.0, edge_cmap=plt.cm.Blues) pos = nx.nx_pydot.pydot_layout(G) #pos = nx.nx_pydot.pydot_layout(G, prog='dot') #pos = nx.nx_pydot.pydot_layout(G, prog='neato') # labels nx.draw_networkx_labels(G, pos, font_size=8, font_family='sans-serif') # nodes nx.draw_networkx_nodes(G, pos, node_size=200, node_color='r', alpha=0.3) # edges nx.draw_networkx_edges(G, pos, dge_color=network['value'].head( len(G.edges(data=True))), width=3.0, edge_cmap=plt.cm.Blues, alpha=0.3) plt.axis('off') plt.show() ts = time.time() st = datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H%M%S') #save figure with hetmap figpath = path + "\\figures\\network_" + st + ".png" #network.to_csv(path + "\\figures\\network_" + st + ".csv") plt.savefig(figpath) return figpath
#from pypanda import Panda #from pypanda import AnalyzePanda #from pypanda import Lioness #import pandas as pd # #p = Panda('ToyExpressionData.txt', 'ToyMotifData.txt', 'ToyPPIData.txt', remove_missing=True) #p.save_panda_results(file = 'Toy_Panda.pairs') #plot = AnalyzePanda(p) #plot.top_network_plot(top=100, file='top_100_genes.png') from arboretum.algo import grnboost2, genie3 from arboretum.utils import load_tf_names netdata = dfz.T # rotate matrix network = grnboost2(expression_data=netdata, tf_names=list(netdata)) # generate network ############################ PLOT 3D NETWORK ############################################ import networkx as nx import matplotlib.pyplot as plt # Build a dataframe with your connections df = pd.DataFrame({ 'from': ['A', 'B', 'C', 'A'], 'to': ['D', 'A', 'E', 'C'], 'value': [1, 10, 5, 5] }) df.rename(columns={'importance': 'value'}, inplace=True) df
def GeneRegulationNetwork(self, netthreshold, config, netconfig): # Transpose the dataframe to get correct format to create the network dfT = self.dfz.transpose() # Get all the TF Gene names tf_names = list(dfT) # Create a Dask Client, just in case we want parellalize the algorithm client = Client(processes=False) # create dataframe network with columns --> TF, target Gene, Importance if netconfig == 1: network = grnboost2(expression_data=dfT, tf_names=tf_names, client_or_address=client) print("grnboost2") else: network = genie3(expression_data=dfT, tf_names=tf_names, client_or_address=client) # We put a threshold because we have a lot of conections and we want to obtain a clear graph with the most representatives conected genes limit = network.index.size * netthreshold G = nx.from_pandas_edgelist(network.head(int(limit)), 'TF', 'target', ['importance'], create_using=nx.Graph(directed=False)) N = len(list(G.node())) # number of genes nodes V = list(G.node()) # list of genes nodes Edges = list(G.edges()) layt = { 1: nx.fruchterman_reingold_layout(G, dim=3), 2: nx.circular_layout(G, dim=3) }.get(config, nx.circular_layout(G, dim=3)) laytN = list(layt.values()) Xn = [laytN[k][0] for k in range(N)] # x-coordinates of nodes Yn = [laytN[k][1] for k in range(N)] # y-coordinates Zn = [laytN[k][2] for k in range(N)] # z-coordinates Xe = [] Ye = [] Ze = [] for e in Edges: Xe += [layt[e[0]][0], layt[e[1]][0], None] # x-coordinates of edge ends Ye += [layt[e[0]][1], layt[e[1]][1], None] Ze += [layt[e[0]][2], layt[e[1]][2], None] trace1 = Scatter3d(x=Xe, y=Ye, z=Ze, mode='lines', line=Line(color='rgb(125,125,125)', width=1), hoverinfo='none') trace2 = Scatter3d(x=Xn, y=Yn, z=Zn, mode='markers+text', textposition='top center', name='genes', marker=Marker(symbol='circle', size=3, color='#6959CD', colorscale='Viridis', line=Line(color='rgb(50,50,50)', width=1)), text=V, hoverinfo='text') axis = dict(showbackground=False, showline=False, zeroline=False, showgrid=False, showticklabels=False, title='') fig = Figure(data=Data([trace1, trace2]), layout=Layout( title="Gene Regulatory Network", width=1000, height=1000, showlegend=False, scene=Scene( xaxis=XAxis(axis), yaxis=YAxis(axis), zaxis=ZAxis(axis), ), margin=Margin(t=100), hovermode='closest', annotations=Annotations([ Annotation(showarrow=False, text="Khaos Research Group", xref='paper', yref='paper', x=0, y=0.1, xanchor='left', yanchor='bottom', font=Font(size=20)) ]), )) plotly.offline.plot(fig, filename='3DNetworkx_.html', auto_open=True) script = plot(fig, output_type='div', include_plotlyjs=False, show_link=True) #print(script) return script