Example #1
0
def main():
	vcf_path, bed_path, expression_matrix_path, covariate_numerical, covariate_categorical, covariate_file_path, output_dir = argument_parser()

	# extract basic information from input files
	vcf_sample, vcf_var, expression_sample, expression_gene, cov_sample, cov_num, cov_used_num = file_info(vcf_path, expression_matrix_path, covariate_numerical, covariate_categorical, covariate_file_path)
	print("VCF file:", vcf_sample, "samples and", vcf_var, "variants")
	print("Expression matrix:", expression_sample, "samples and", expression_gene, "genes")
	print("Covariate file:", cov_sample, "samples and", cov_num, "covariates;", cov_used_num, "covariates used")

	# map samples to genes, write bin expression file for linear regression
	annotation.main(vcf_path, bed_path, expression_matrix_path, output_dir)

	# build linear regression model and perform t test, write output summary file
	print('buiding regression model...')
	regression.main(output_dir, covariate_numerical, covariate_categorical, covariate_file_path)

	print('done')
	return 0
Example #2
0
def main(run_nums, data_dir, write_dir):

    print('Setting up files for runs ' + str(run_nums))

    for run_num in run_nums.split(' '):

        print('Run: ' + str(run_num))

        # Set up filtered data files
        print('Creating filtered files...')
        filters.main(True, True, run_num, data_dir, write_dir)

        # Run RF regression on each run to obtain ground truth files
        print('Creating ground truth files...')
        regression.main(run_num, None, 'RF', data_dir, write_dir, 2)

        print('\n')

    print('Done')
Example #3
0
    def post(self):
        next_values = {}
        users = []
        files = self.request.files
        for file_name in files:
            updateData(file_name, files[file_name][0]['body'].decode("utf-8"))

            #execute regression
            predict_value = regression.main('data/'+file_name)/MULTIPLICAND
            next_values[file_name] = predict_value
            users.append(file_name.split('_')[0])
        calculateNextMove(next_values, users)
Example #4
0
    def post(self):
        next_values = {}
        users = []
        files = self.request.files
        for file_name in files:
            updateData(file_name, files[file_name][0]['body'].decode("utf-8"))

            #execute regression
            predict_value = regression.main('data/' + file_name) / MULTIPLICAND
            next_values[file_name] = predict_value
            users.append(file_name.split('_')[0])
        calculateNextMove(next_values, users)
Example #5
0
def predict_regression():
    collection = request.get_json()['collection']
    features = request.get_json()['features']
    target = request.get_json()['target']
    inputType = request.get_json()['inputType']
    ml_result = regression.main(collection, features, target, inputType)
    return jsonify({
        "Linear_Regression": ml_result['Linear_Regression'].tolist(),
        "RandomForest": ml_result['RandomForest'].tolist(),
        "GradientBoosting": ml_result['GradientBoosting'].tolist(),
        "files": ml_result.index.tolist()
    })
Example #6
0
def main():
    # Title
    st.title("AlphaAI")

    # Sidebar
    activities = [
        "Home", "Dataset Explorer", "ML Classifiers", "ML Regression",
        "News Classification", "Text Summarizer",
        "Real World Data Distribution", "Vision API"
    ]
    choice = st.sidebar.selectbox("Choose Activity", activities)

    if choice == "Home":
        st.header(
            'Empowering companies to jumpstart AI and generate real-world value'
        )
        st.subheader(
            'Use exponential technologies to your advantage and lead your industry with confidence through innovation.'
        )

        image = Image.open('images/img0.jpg')
        st.image(image, use_column_width=True, caption='Data Mining')

    if choice == "Dataset Explorer":
        st.subheader("Dataset Explorer")
        dataset_analysis.main()
    if choice == "Real World Data Distribution":
        geo_climate.main()
    if choice == "ML Regression":
        regression.main()
    if choice == "ML Classifiers":
        classification.main()
    if choice == "Vision API":
        vision_api.main()
    if choice == "Text Summarizer":
        text_summ.main()
    if choice == "News Classification":
        newsclass.main()
Example #7
0
# -*- coding: utf-8 -*-
#
# Copyright (c) 2017, the cclib development team
#
# This file is part of cclib (http://cclib.github.io) and is distributed under
# the terms of the BSD 3-Clause License.

"""This script runs the regression framework in the cclib-data repostiory."""

from __future__ import print_function

import os
import sys


if __name__ == "__main__":

    # Assume the cclib-data repository is cloned in this directory.
    regression_dir = os.path.join("..", "data", "regression")
    sys.path.append(regression_dir)
    import regression

    opt_traceback = "--traceback" in sys.argv
    opt_status = "--status" in sys.argv

    # This can be used to limit the programs we want to run regressions for.
    which = [arg for arg in sys.argv[1:] if not arg in ["--status", "--traceback"]]

    regression.main(which, opt_traceback, opt_status, regression_dir)
Example #8
0
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 25 14:13:45 2019

@author: drape
"""

print('this should print')
import bank_boost as bb
import bagging as bag
import random_forest as rf
import regression as reg

print('does this print?')

bb.main()
bag.main()
rf.main()
reg.main()
Example #9
0
def main(companyName, companySymbol, pred_date, weights, companyDict={}):
    #initializing the neuron class
    neural_network = NeuralNetwork()

    #print("Beginning Randomly Generated Weights: ")
    #print(neural_network.synaptic_weights)
    if len(weights) == 0:
        #training data consisting of 4 examples--3 input values and 1 output
        training_examples = []
        training_examples_outputs = []
        companyDict = {
        }  # Avoid Slowdown from Yahoo and Nasdaq during training. Cache the results

        for i in range(1, 13):
            example = []
            # set up dates
            latestDate = datetime.datetime(2018, i, 2)
            if i == 12:
                endDate = datetime.datetime(2019, 1, 1)
            else:
                endDate = datetime.datetime(2018, i + 1, 1)
            sentiment_analysis_tuple = SA.getSentiment(companySymbol,
                                                       companyName, latestDate,
                                                       endDate, companyDict)
            example.append(sentiment_analysis_tuple[0])
            # example.append(float(sentiment_analysis_tuple[1]/100))
            companyDict = sentiment_analysis_tuple[2]

            while True:
                try:
                    regression_tuple = regression.main(companySymbol, endDate)
                    if regression_tuple[1] == None:
                        #print('No data for {}. Trying the next market day'.format(endDate.isoformat()))
                        endDate = endDate.replace(year=endDate.year,
                                                  month=endDate.month,
                                                  day=endDate.day + 1)
                    else:
                        break
                except:
                    #print('Markets were closed on {}. Trying the next day this month'.format(endDate.isoformat()))
                    endDate = endDate.replace(year=endDate.year,
                                              month=endDate.month,
                                              day=endDate.day + 1)

            #print(regression_tuple)
            example.append(regression_tuple[0])
            training_examples_outputs.append(regression_tuple[1])  #truth value

            training_examples.append(example)
        training_inputs = np.array(training_examples)
        #print(training_inputs)

        #training_inputs = np.array([[0,0,1],
        #                            [1,1,1],
        #                            [1,0,1],
        #                            [0,1,1]])
        #print(training_examples_outputs)
        training_outputs = np.array([training_examples_outputs]).T
        #training_outputs = np.array([[0,1,1,0]]).T

        #training taking place
        neural_network.train(training_inputs, training_outputs, 15000)
    else:
        neural_network.synaptic_weights = weights

    #print("Ending Weights After Training: ")
    #print(neural_network.synaptic_weights)
    pred_back = pred_date.replace(year=pred_date.year,
                                  month=pred_date.month,
                                  day=pred_date.day - 2)

    input_features = []
    sentiment_analysis_tuple = SA.getSentiment(companySymbol, companyName,
                                               pred_back, pred_date,
                                               companyDict)
    #print(sentiment_analysis_tuple)
    input_features.append(sentiment_analysis_tuple[0])
    #input_features.append(float(sentiment_analysis_tuple[1]/100))
    #print(sentiment_analysis_tuple[1])
    regression_tuple = regression.main(companySymbol, pred_date)
    input_features.append(regression_tuple[0])

    actual = regression_tuple[1]
    #print(pred_date.isoformat())
    #print("\t Sentiment", sentiment_analysis_tuple[0])
    #if sentiment_analysis_tuple[0] > SENTIMENT_BOUNDARY:
    #    return (1, neural_network.synaptic_weights, companyDict)
    #else:
    #    return (0, neural_network.synaptic_weights, companyDict)

    print("\n Considering New Situation: " + str(input_features))
    prediction = neural_network.think(np.array(input_features))
    print("Trial for day ", pred_date.isoformat())
    print("\t Prediction: ", prediction)
    print("\t Actual Change: ", actual)
    if prediction < actual + .1 and prediction > actual - .1:
        return (1, neural_network.synaptic_weights, companyDict)
    else:
        return (0, neural_network.synaptic_weights, companyDict)
Example #10
0
'''
Created on 20 mrt. 2013

@author: Erik Vandeputte
'''
import kmeans_assign
import regression
import time

RESULTSFILE = "results.txt"

clusters = [100, 200, 250, 300, 400, 450, 500, 700]
start_time = time.time()
f = open(RESULTSFILE, 'a')
for num_clusters in clusters:
    print 'perform kmeans for %d clusters' % num_clusters
    #kmeans_full.main(num_clusters)
    #assign (hard and soft)
    kmeans_assign.main(True, num_clusters)
    kmeans_assign.main(False, num_clusters)
    #perform regression(hard and soft)
    mse_hard, alpha_hard = regression.main(True, num_clusters)
    mse_soft, alpha_soft = regression.main(False, num_clusters)
    f.write("%s\t%s\t%s\t%s\n" %
            (str(num_clusters), 'hard', str(mse_hard), str(alpha_hard)))
    f.write("%s\t%s\t%s\t%s\n" %
            (str(num_clusters), 'soft', str(mse_soft), str(alpha_soft)))
    f.flush()
f.write("running this script took %.2f seconds" % (time.time() - start_time))
f.close()
Example #11
0
def main():
    import argparse

    parser = argparse.ArgumentParser(description='Neural Network framework.')
    parser.add_argument(
        'action',
        choices=['regression', 'classification'],
        help='Choose mode either \'regression\' or \'classification\'.')

    parser.add_argument(
        'activation',
        choices=['sigmoid', 'relu', 'tanh'],
        help='Choose mode either \'sigmoid\' or \'relu\' or \'tanh\'.')

    parser.add_argument('--train_filename',
                        type=str,
                        help='Name of a file containing training data',
                        required=False)
    parser.add_argument('--test_filename',
                        type=str,
                        help='Name of a file containing testing data')
    parser.add_argument(
        '--create_nn',
        nargs='*',
        type=int,
        help=
        'When creating a nn from scratch; number of neurons for each layer',
        required=False)

    parser.add_argument('--save_nn',
                        type=str,
                        help='Name of a file to save trained model to.')
    parser.add_argument('--savefig_filename',
                        type=str,
                        help='Name of a file to save plot to.')

    parser.add_argument('-e',
                        '--number_of_epochs',
                        type=int,
                        help='Number of epochs (iterations) for the NN to run',
                        required=False,
                        default=10000)
    parser.add_argument('--read_nn',
                        type=str,
                        help='When reading existing nn from a file; filename')
    parser.add_argument(
        '-v',
        '--visualize_every',
        type=int,
        help='How ofter (every n iterations) print neuron\'s weights.',
        required=False)
    parser.add_argument('--l_rate',
                        type=float,
                        help='Learning rate',
                        required=False,
                        default=0.001)

    parser.add_argument('--seed',
                        type=int,
                        help='Random seed int',
                        required=False,
                        default=1)

    parser.add_argument('--biases', dest='biases', action='store_true')
    parser.add_argument('--no_biases', dest='biases', action='store_false')
    parser.set_defaults(biases=True)

    args = parser.parse_args()

    # Seed the random number generator
    random.seed(args.seed)

    if args.create_nn is None and args.read_nn is None:
        print('Either \'--create_nn\' or \'--read_nn\' has to be provided.')
        exit(1)

    if args.train_filename is None and args.save_nn is not None:
        print(
            '\'--save_nn\' cannot be provided when \'--train_filename\' is not provided.'
        )
        exit(1)

    if args.train_filename is None and args.create_nn is not None:
        print(
            '\'--create_nn\' cannot be provided when \'--train_filename\' is not provided.'
        )
        exit(1)

    if args.activation == 'sigmoid':
        from util import sigmoid, sigmoid_derivative
        activation_f, activation_f_derivative = sigmoid, sigmoid_derivative
    elif args.activation == 'relu':
        from util import reLu, reLu_derivative
        activation_f, activation_f_derivative = reLu, reLu_derivative
    elif args.activation == 'tanh':
        from util import tanh, tanh_derivative
        activation_f, activation_f_derivative = tanh, tanh_derivative
    else:
        print(
            'Sorry, second positional argument has to be either \'sigmoid\' or \'relu\' or \'tanh\'.'
        )
        exit(1)

    if args.action == 'regression':
        import regression
        regression.main(args.train_filename, args.test_filename,
                        args.create_nn, args.save_nn, args.read_nn,
                        args.number_of_epochs, args.visualize_every,
                        args.l_rate, args.savefig_filename, activation_f,
                        activation_f_derivative)
    elif args.action == 'classification':
        import classification
        classification.main(args.train_filename, args.test_filename,
                            args.create_nn, args.save_nn, args.read_nn,
                            args.number_of_epochs, args.visualize_every,
                            args.l_rate, args.biases, activation_f,
                            activation_f_derivative)
    else:
        print(
            'Sorry, first positional argument has to be either \'regression\' or \'classification\'.'
        )
        exit(1)
Example #12
0
'''
Created on 20 mrt. 2013

@author: Erik Vandeputte
'''
import kmeans_assign
import regression
import time

RESULTSFILE = "results.txt"

clusters =  [100,200,250,300,400,450,500,700]
start_time = time.time()
f = open(RESULTSFILE,'a')
for num_clusters in clusters:
    print 'perform kmeans for %d clusters' %num_clusters
    #kmeans_full.main(num_clusters)
    #assign (hard and soft)
    kmeans_assign.main(True,num_clusters)
    kmeans_assign.main(False,num_clusters)
    #perform regression(hard and soft)
    mse_hard,alpha_hard = regression.main(True, num_clusters)
    mse_soft,alpha_soft = regression.main(False, num_clusters)
    f.write("%s\t%s\t%s\t%s\n" % (str(num_clusters),'hard',str(mse_hard),str(alpha_hard)))
    f.write("%s\t%s\t%s\t%s\n" % (str(num_clusters),'soft',str(mse_soft),str(alpha_soft)))
    f.flush()
f.write("running this script took %.2f seconds" % (time.time() - start_time))
f.close()