def upload(): if request.method == 'POST': #Checks if username is correct user_name = request.form.get('input_user') if user_name != '': return render_template('invalid_username.html') #Checks if password is correct password = request.form.get('input_password') if password != '': return render_template('invalid_password.html') # Retrieve data submitted through the 'Contribute' form submission = request.form.get('new_submission_name') now = datetime.datetime.now().date() notebook = request.form.get("notebook_link") int_type = request.form.get('input_interaction') organism = request.form.get('input_species') resource = request.form.get('input_resource') file_type = request.form.get('input_type') file = request.files['input_file'] ###### Processing 'file' and uploading data to server ############### #wrap FileStorage object in a TextIOWrapper #So file can later be used f = io.TextIOWrapper(file) # Submit data on submissions before processing the data #Must be done prior to processing the data so that the #submissions foreign key can be integrated into the # interactions table inserted when the file is processed submissions = pd.DataFrame( columns=['submission_name', 'submission_type_fk', 'resource_fk']) submissions = submissions.append([{ 'submission_name': submission, 'submission_type_fk': int(int_type), 'resource_fk': int(resource), 'date_contributed': str(now), 'processing_script': str(notebook), 'file_type_fk': int(file_type) }]) #Insert submissions into the table insert_s = "INSERT INTO submissions (submission_name, submission_type_fk, resource_fk, date_contributed, processing_script, file_type_fk) VALUES" + ', '.join( [ '("{submission_name}", {submission_type_fk}, {resource_fk}, "{date_contributed}", "{processing_script}", {file_type_fk})' .format(**rowData) for index, rowData in submissions.iterrows() ]) sub = engine.execute(insert_s) #Retrieve id of last insert into a database table (i.e. submission foreign key) submission_fk = sub.lastrowid print(submission_fk) #Process a gmt file if file type = 1 if file_type == '1': gmt = f gmt_data = [x.strip().split('\t') for x in gmt.readlines()] sig_version = Scripts.GMT_to_SIG(gmt_data, secure_filename(file.filename)) interactions = Scripts.SIG_to_Genes(sig_version, submission_fk, organism, engine) insert_gene = "INSERT INTO interactions (source_gene_fk, target_gene_fk, submission_fk) VALUES" + ', '.join( [ '({source_gene_fk}, {target_gene_fk}, {submission_fk})'. format(**rowData) for index, rowData in interactions.iterrows() ]) engine.execute(insert_gene) ##### Calculate Website Statistics for 'Submissions' Page for GMT File ################ stat_df = sig_version.drop([1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12], axis=1) stat_df.columns = ['ProteinA', 'ProteinB'] # First stat: Number of Total Interactions interaction_num = len(stat_df) # Second stat: Number of Sources/Hubs hub_terms = len(stat_df.ProteinA.unique()) print(hub_terms) # Third stat: Number of Targets target_terms = len(stat_df.ProteinB.unique()) print(target_terms) #Fourth stat: Total Number of Unique Terms if stat_df['ProteinA'].str.contains('_').all(): stat_df['ProteinA'] = [ x.split('_')[:-1] for x in stat_df.ProteinA ] stat_df['ProteinA'] = [ '_'.join(x) for x in stat_df['ProteinA'] ] if stat_df['ProteinA'].str.contains('-').all(): stat_df['ProteinA'] = [ x.split('-')[:-1] for x in stat_df.ProteinA ] stat_df['ProteinA'] = [ '-'.join(x) for x in stat_df['ProteinA'] ] else: pass if stat_df['ProteinB'].str.contains(',').all(): stat_df['ProteinB'] = [ x.split(',')[:-1] for x in stat_df.ProteinB ] stat_df['ProteinB'] = [ ','.join(x) for x in stat_df['ProteinB'] ] else: pass unique_terms = len( pd.concat([stat_df.ProteinA, stat_df.ProteinB], axis=0).unique()) print(unique_terms) #Fifth stat: Avg. Interactions per Term stat_df.set_index('ProteinA', inplace=True) stat_df = stat_df.groupby('ProteinA').agg(lambda x: tuple(x)) stat_df['targets'] = [ int(len(lst)) for source, lst in stat_df['ProteinB'].iteritems() ] avg_term = stat_df.targets.mean(axis=0) print(round(avg_term)) insert_stat = "INSERT INTO statistics (interaction_num, hub_terms, target_terms, unique_terms, avg_term, submission_fk) VALUES" + '(%d' % interaction_num + ', %d' % hub_terms + ', %d,' % target_terms + '%d,' % unique_terms + '%d,' % avg_term + ' %d)' % submission_fk engine.execute(insert_stat) #Process a sig file if the file type = 2 if file_type == '2': sig = pd.read_table(f, header=None) if len(sig.columns) == 1: sig = pd.read_table( f, header=None, names=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], sep=' ') interactions = Scripts.SIG_to_Genes(sig, submission_fk, organism, engine) insert_gene = "INSERT INTO interactions (source_gene_fk, target_gene_fk, submission_fk) VALUES" + ', '.join( [ '({source_gene_fk}, {target_gene_fk}, {submission_fk})'. format(**rowData) for index, rowData in interactions.iterrows() ]) engine.execute(insert_gene) ##### Calculate Website Statistics for 'Submissions' Page for GMT File ################ stat_df = sig.drop([1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12], axis=1) stat_df.columns = ['ProteinA', 'ProteinB'] # First stat: Number of Total Interactions interaction_num = len(stat_df) # Second stat: Number of Sources/Hubs hub_terms = len(stat_df.ProteinA.unique()) print(hub_terms) # Third stat: Number of Targets target_terms = len(stat_df.ProteinB.unique()) print(target_terms) #Fourth stat: Total Number of Unique Terms if stat_df['ProteinA'].str.contains('_').all(): stat_df['ProteinA'] = [ x.split('_')[:-1] for x in stat_df.ProteinA ] stat_df['ProteinA'] = [ '_'.join(x) for x in stat_df['ProteinA'] ] if stat_df['ProteinA'].str.contains('-').all(): stat_df['ProteinA'] = [ x.split('-')[:-1] for x in stat_df.ProteinA ] stat_df['ProteinA'] = [ '-'.join(x) for x in stat_df['ProteinA'] ] else: pass if stat_df['ProteinB'].str.contains(',').all(): stat_df['ProteinB'] = [ x.split(',')[:-1] for x in stat_df.ProteinB ] stat_df['ProteinB'] = [ ','.join(x) for x in stat_df['ProteinB'] ] else: pass unique_terms = len( pd.concat([stat_df.ProteinA, stat_df.ProteinB], axis=0).unique()) print(unique_terms) #Fifth stat: Avg. Interactions per Term stat_df.set_index('ProteinA', inplace=True) stat_df = stat_df.groupby('ProteinA').agg(lambda x: tuple(x)) stat_df['targets'] = [ int(len(lst)) for source, lst in stat_df['ProteinB'].iteritems() ] avg_term = stat_df.targets.mean(axis=0) print(round(avg_term)) insert_stat = "INSERT INTO statistics (interaction_num, hub_terms, target_terms, unique_terms, avg_term, submission_fk) VALUES" + '(%d' % interaction_num + ', %d' % hub_terms + ', %d,' % target_terms + '%d,' % unique_terms + '%d,' % avg_term + ' %d)' % submission_fk engine.execute(insert_stat) #Return to 'Contribute' page after making the submission return redirect(url_for('Contribute'))
def upload(): if request.method == 'POST': #this works! #Check if username is correct! user_name = request.form.get('input_user') if user_name != '': return "Invalid username, please try again" #this works! #Check if password is correct! password = request.form.get('input_password') if password != '': return "Invalid password, please try again" #this works! submission = request.form.get('new_submission_name') #this works! date = request.form.get('date') print(date) #this works! notebook = request.form.get("notebook_link") print(notebook) #this works! int_type = request.form.get('input_interaction') #this works! organism = request.form.get('input_species') #this works! resource = request.form.get('input_resource') #this works! file_type = request.form.get('input_type') #this works! file = request.files['input_file'] print(file) ###### Processing 'file' and uploading data to server ############### #this works! (for now) f = io.TextIOWrapper(file) print(f) #this works! submissions = pd.DataFrame(columns = ['submission_name', 'submission_type_fk', 'resource_fk']) submissions = submissions.append([{ 'submission_name': submission, 'submission_type_fk': int(int_type), 'resource_fk': int(resource) }]) print(submissions) #When uncommented, this works! UNCOMMENT FOR FINAL TRIAL insert_s = "INSERT INTO submissions (submission_name, submission_type_fk, resource_fk) VALUES"+ ', '.join(['("{submission_name}", {submission_type_fk}, {resource_fk})'.format(**rowData) for index, rowData in submissions.iterrows()]) sub = engine.execute(insert_s) submission_fk = sub.lastrowid print(submission_fk) #add other 'if' statements to send file to right python processor based on int_type #also at some point once this whole process is over we also want to save these files #somewhere and convert them to their alternate format --> I know procedure to save locally #but is this what we really want? --> Won't be automatic uploads elsewhere #this works! if file_type == '1': gmt = f gmt_data = [x.strip().split('\t') for x in gmt.readlines()] sig_version = Scripts.GMT_to_SIG(gmt_data, secure_filename(file.filename)) print(sig_version.head()) interactions = Scripts.SIG_to_Genes(sig_version, submission_fk, organism, engine) insert_gene = "INSERT INTO interactions (source_gene_fk, target_gene_fk, submission_fk) VALUES"+ ', '.join(['({source_gene_fk}, {target_gene_fk}, {submission_fk})'.format(**rowData) for index, rowData in interactions.iterrows()]) engine.execute(insert_gene) #this works! if file_type == '2': sig = pd.read_table(f, header = None) if 5 not in sig.columns: sig = pd.read_table(f, header = None, names = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], sep = ' ') print(sig.head()) interactions = Scripts.SIG_to_Genes(sig, submission_fk, organism, engine) insert_gene = "INSERT INTO interactions (source_gene_fk, target_gene_fk, submission_fk) VALUES"+ ', '.join(['({source_gene_fk}, {target_gene_fk}, {submission_fk})'.format(**rowData) for index, rowData in interactions.iterrows()]) engine.execute(insert_gene) return redirect(url_for('Contribute'))