def upload():

    if request.method == 'POST':

        #Checks if username is correct
        user_name = request.form.get('input_user')
        if user_name != '':
            return render_template('invalid_username.html')

        #Checks if password is correct
        password = request.form.get('input_password')
        if password != '':
            return render_template('invalid_password.html')

        # Retrieve data submitted through the 'Contribute' form
        submission = request.form.get('new_submission_name')

        now = datetime.datetime.now().date()

        notebook = request.form.get("notebook_link")

        int_type = request.form.get('input_interaction')

        organism = request.form.get('input_species')

        resource = request.form.get('input_resource')

        file_type = request.form.get('input_type')

        file = request.files['input_file']

        ###### Processing 'file' and uploading data to server ###############

        #wrap FileStorage object in a TextIOWrapper
        #So file can later be used
        f = io.TextIOWrapper(file)

        # Submit data on submissions before processing the data
        #Must be done prior to processing the data so that the
        #submissions foreign key can be integrated into the
        # interactions table inserted when the file is processed
        submissions = pd.DataFrame(
            columns=['submission_name', 'submission_type_fk', 'resource_fk'])
        submissions = submissions.append([{
            'submission_name': submission,
            'submission_type_fk': int(int_type),
            'resource_fk': int(resource),
            'date_contributed': str(now),
            'processing_script': str(notebook),
            'file_type_fk': int(file_type)
        }])

        #Insert submissions into the table
        insert_s = "INSERT INTO submissions (submission_name, submission_type_fk, resource_fk, date_contributed, processing_script, file_type_fk) VALUES" + ', '.join(
            [
                '("{submission_name}", {submission_type_fk}, {resource_fk}, "{date_contributed}", "{processing_script}", {file_type_fk})'
                .format(**rowData)
                for index, rowData in submissions.iterrows()
            ])
        sub = engine.execute(insert_s)

        #Retrieve id of last insert into a database table (i.e. submission foreign key)
        submission_fk = sub.lastrowid
        print(submission_fk)

        #Process a gmt file if file type = 1
        if file_type == '1':
            gmt = f
            gmt_data = [x.strip().split('\t') for x in gmt.readlines()]
            sig_version = Scripts.GMT_to_SIG(gmt_data,
                                             secure_filename(file.filename))
            interactions = Scripts.SIG_to_Genes(sig_version, submission_fk,
                                                organism, engine)
            insert_gene = "INSERT INTO interactions (source_gene_fk, target_gene_fk, submission_fk) VALUES" + ', '.join(
                [
                    '({source_gene_fk}, {target_gene_fk}, {submission_fk})'.
                    format(**rowData)
                    for index, rowData in interactions.iterrows()
                ])
            engine.execute(insert_gene)

            ##### Calculate Website Statistics for 'Submissions' Page for GMT File ################

            stat_df = sig_version.drop([1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12],
                                       axis=1)
            stat_df.columns = ['ProteinA', 'ProteinB']

            # First stat: Number of Total Interactions
            interaction_num = len(stat_df)

            # Second stat: Number of Sources/Hubs
            hub_terms = len(stat_df.ProteinA.unique())
            print(hub_terms)

            # Third stat: Number of Targets
            target_terms = len(stat_df.ProteinB.unique())
            print(target_terms)

            #Fourth stat: Total Number of Unique Terms
            if stat_df['ProteinA'].str.contains('_').all():
                stat_df['ProteinA'] = [
                    x.split('_')[:-1] for x in stat_df.ProteinA
                ]
                stat_df['ProteinA'] = [
                    '_'.join(x) for x in stat_df['ProteinA']
                ]

            if stat_df['ProteinA'].str.contains('-').all():
                stat_df['ProteinA'] = [
                    x.split('-')[:-1] for x in stat_df.ProteinA
                ]
                stat_df['ProteinA'] = [
                    '-'.join(x) for x in stat_df['ProteinA']
                ]

            else:
                pass

            if stat_df['ProteinB'].str.contains(',').all():
                stat_df['ProteinB'] = [
                    x.split(',')[:-1] for x in stat_df.ProteinB
                ]
                stat_df['ProteinB'] = [
                    ','.join(x) for x in stat_df['ProteinB']
                ]

            else:
                pass

            unique_terms = len(
                pd.concat([stat_df.ProteinA, stat_df.ProteinB],
                          axis=0).unique())
            print(unique_terms)

            #Fifth stat: Avg. Interactions per Term
            stat_df.set_index('ProteinA', inplace=True)
            stat_df = stat_df.groupby('ProteinA').agg(lambda x: tuple(x))
            stat_df['targets'] = [
                int(len(lst))
                for source, lst in stat_df['ProteinB'].iteritems()
            ]

            avg_term = stat_df.targets.mean(axis=0)
            print(round(avg_term))

            insert_stat = "INSERT INTO statistics (interaction_num, hub_terms, target_terms, unique_terms, avg_term, submission_fk) VALUES" + '(%d' % interaction_num + ', %d' % hub_terms + ', %d,' % target_terms + '%d,' % unique_terms + '%d,' % avg_term + ' %d)' % submission_fk
            engine.execute(insert_stat)

        #Process a sig file if the file type = 2
        if file_type == '2':
            sig = pd.read_table(f, header=None)
            if len(sig.columns) == 1:
                sig = pd.read_table(
                    f,
                    header=None,
                    names=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
                    sep=' ')
            interactions = Scripts.SIG_to_Genes(sig, submission_fk, organism,
                                                engine)
            insert_gene = "INSERT INTO interactions (source_gene_fk, target_gene_fk, submission_fk) VALUES" + ', '.join(
                [
                    '({source_gene_fk}, {target_gene_fk}, {submission_fk})'.
                    format(**rowData)
                    for index, rowData in interactions.iterrows()
                ])
            engine.execute(insert_gene)

            ##### Calculate Website Statistics for 'Submissions' Page for GMT File ################

            stat_df = sig.drop([1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12], axis=1)
            stat_df.columns = ['ProteinA', 'ProteinB']

            # First stat: Number of Total Interactions
            interaction_num = len(stat_df)

            # Second stat: Number of Sources/Hubs
            hub_terms = len(stat_df.ProteinA.unique())
            print(hub_terms)

            # Third stat: Number of Targets
            target_terms = len(stat_df.ProteinB.unique())
            print(target_terms)

            #Fourth stat: Total Number of Unique Terms
            if stat_df['ProteinA'].str.contains('_').all():
                stat_df['ProteinA'] = [
                    x.split('_')[:-1] for x in stat_df.ProteinA
                ]
                stat_df['ProteinA'] = [
                    '_'.join(x) for x in stat_df['ProteinA']
                ]

            if stat_df['ProteinA'].str.contains('-').all():
                stat_df['ProteinA'] = [
                    x.split('-')[:-1] for x in stat_df.ProteinA
                ]
                stat_df['ProteinA'] = [
                    '-'.join(x) for x in stat_df['ProteinA']
                ]

            else:
                pass

            if stat_df['ProteinB'].str.contains(',').all():
                stat_df['ProteinB'] = [
                    x.split(',')[:-1] for x in stat_df.ProteinB
                ]
                stat_df['ProteinB'] = [
                    ','.join(x) for x in stat_df['ProteinB']
                ]

            else:
                pass

            unique_terms = len(
                pd.concat([stat_df.ProteinA, stat_df.ProteinB],
                          axis=0).unique())
            print(unique_terms)

            #Fifth stat: Avg. Interactions per Term
            stat_df.set_index('ProteinA', inplace=True)
            stat_df = stat_df.groupby('ProteinA').agg(lambda x: tuple(x))
            stat_df['targets'] = [
                int(len(lst))
                for source, lst in stat_df['ProteinB'].iteritems()
            ]

            avg_term = stat_df.targets.mean(axis=0)
            print(round(avg_term))

            insert_stat = "INSERT INTO statistics (interaction_num, hub_terms, target_terms, unique_terms, avg_term, submission_fk) VALUES" + '(%d' % interaction_num + ', %d' % hub_terms + ', %d,' % target_terms + '%d,' % unique_terms + '%d,' % avg_term + ' %d)' % submission_fk
            engine.execute(insert_stat)

    #Return to 'Contribute' page after making the submission
    return redirect(url_for('Contribute'))
Example #2
0
def upload():

	if request.method == 'POST':


		#this works!
		#Check if username is correct!
		user_name = request.form.get('input_user')
		if user_name != '':
			return "Invalid username, please try again" 

		#this works!
		#Check if password is correct!
		password = request.form.get('input_password')
		if password != '':
			return "Invalid password, please try again" 

		#this works!
		submission = request.form.get('new_submission_name')

		#this works!
		date = request.form.get('date')
		print(date)

		#this works!
		notebook = request.form.get("notebook_link")
		print(notebook)
		
		#this works!
		int_type = request.form.get('input_interaction')

		#this works!
		organism = request.form.get('input_species')
		

		#this works!
		resource = request.form.get('input_resource')

		#this works!
		file_type = request.form.get('input_type')


		#this works!
		file = request.files['input_file']
		print(file)


		###### Processing 'file' and uploading data to server ###############


		#this works! (for now)
		f = io.TextIOWrapper(file)
		print(f)

		#this works!
		submissions = pd.DataFrame(columns = ['submission_name', 'submission_type_fk', 'resource_fk'])
		submissions = submissions.append([{
			'submission_name': submission,
			'submission_type_fk': int(int_type),
			'resource_fk': int(resource)
			}])
		print(submissions)

		#When uncommented, this works! UNCOMMENT FOR FINAL TRIAL
		insert_s = "INSERT INTO submissions (submission_name, submission_type_fk, resource_fk) VALUES"+ ', '.join(['("{submission_name}", {submission_type_fk}, {resource_fk})'.format(**rowData) for index, rowData in submissions.iterrows()])
		sub = engine.execute(insert_s)

		submission_fk = sub.lastrowid
		print(submission_fk)

		#add other 'if' statements to send file to right python processor based on int_type

		#also at some point once this whole process is over we also want to save these files 
		#somewhere and convert them to their alternate format --> I know procedure to save locally 
		#but is this what we really want? --> Won't be automatic uploads elsewhere

		#this works!
		if file_type == '1':
			gmt = f
			gmt_data = [x.strip().split('\t') for x in gmt.readlines()]
			sig_version = Scripts.GMT_to_SIG(gmt_data, secure_filename(file.filename))
			print(sig_version.head())
			interactions = Scripts.SIG_to_Genes(sig_version, submission_fk, organism, engine)
			insert_gene = "INSERT INTO interactions (source_gene_fk, target_gene_fk, submission_fk) VALUES"+ ', '.join(['({source_gene_fk}, {target_gene_fk}, {submission_fk})'.format(**rowData) for index, rowData in interactions.iterrows()])
			engine.execute(insert_gene)
			

		#this works!
		if file_type == '2':
			sig = pd.read_table(f, header = None)
			if 5 not in sig.columns:
				sig = pd.read_table(f, header = None, 
				names = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], sep = ' ')
			print(sig.head())
			interactions = Scripts.SIG_to_Genes(sig, submission_fk, organism, engine)
			insert_gene = "INSERT INTO interactions (source_gene_fk, target_gene_fk, submission_fk) VALUES"+ ', '.join(['({source_gene_fk}, {target_gene_fk}, {submission_fk})'.format(**rowData) for index, rowData in interactions.iterrows()])
			engine.execute(insert_gene)

	return redirect(url_for('Contribute'))