Ejemplo n.º 1
0
def test_data(model, data_dir, device):
    correct = 0
    total = 0
    step = 0
    device = torch.device(
        "cuda" if torch.cuda.is_available() and device == 'cuda' else "cpu")
    model.to(device)
    train_datasets, trainloader, validloader, testloader = process_data(
        data_dir)
    with torch.no_grad(
    ):  #turn off gradient step to reduce computation time and use up resources
        model.eval()
        for images, labels in testloader:
            step += 1
            images, labels = images.to(device), labels.to(device)
            outputs = model.forward(images)
            ps = torch.exp(
                outputs
            )  #convert to softmax probability from 0 to 1 for each image in each batch
            top_p, top_class = ps.topk(1, dim=1)
            equals = top_class == labels.view(*top_class.shape)
            accuracy = torch.mean(equals.type(torch.FloatTensor))
            print('Accuracy for batch', step,
                  ':{:.3f}%'.format(accuracy * 100))

            correct += sum(equals).item()
            total += labels.size(0)
    print('Number of correct classified images:', correct)
    print('Number of images in test set:', total)
    print('Accuracy of test set:{:.3f}%'.format(100 * correct / total))
Ejemplo n.º 2
0
def main():
    try:
        learning_rate = float(sys.argv[1])
    except:
        learning_rate = config.learning_rate
    # number of hidden layers
    hidden = int(sys.argv[2])
    L = hidden + 2  # L is the total no of layers
    data_file = sys.argv[3]  #dataset filename

    # regularization parameter
    try:
        lambd = float(sys.argv[4])
    except:
        lambd = config.lambd
    # layers
    try:
        k = int(sys.argv[5])  # no of output nodes, ie, nodes in the Lth layers
    except:
        k = config.k
    try:
        normal_file = sys.argv[6]  # normalized datset name
    except:
        normal_file = config.normal_file

    # the number of nodes per layers excluding the biasing unit
    # this will be used to build the theta array
    nodes_per = []

    for _ in range(2, hidden + 2):
        print("Nodes in layer", _, end=" : ")
        nodes_per.append(int(input()))

    m, dataset, statistics = helper.process_data(data_file, normal_file, k)
    print(dataset)
    input()
    nodes_per.append(k)
    nodes_per.insert(0, len(dataset[0][0]))  # the number of inputs
    L, nodes_per, inital_thetas = design_thetas(nodes_per)

    # final_thetas, theta_history, total_runs, final_rate, regular_param \
    # = learn_thetas(inital_thetas, dataset, learning_rate, lambd)
    theta_history = learn_thetas(inital_thetas, dataset, learning_rate, lambd)
    final_thetas = theta_history[-1]

    cont = True
    while cont:
        query_y(final_thetas)
        print("Calculate another (Y/n) : ", end="")
        ans, cont = str(input()), False
        if ans == "" or ans[0] == "" or ans[0] == "y":
            cont = True
Ejemplo n.º 3
0
    def _text2vector(self, texts):
        '''Given a list of Strings will convert to a numpy 3D array where each
        token in the text is reprsented as a vector from the self.word2vec_model.

        see semeval.helper.process_data for more details.

        list of strings -> 3D numpy array (len(texts), max_number_tokens,
        self.word2vec_model.vector_size)
        '''

        if self._max_length == 0:
            raise Exception('Your model requires training first')

        return helper.process_data(texts, self._word2vec_model, self._max_length)
Ejemplo n.º 4
0
def run(options):

    # Checking if all required options are specified
    if options.ensembl is None:
        print '\nError: no Ensembl release specified. Use option -h to get help!\n'
        quit()

    try:
        options.ensembl = int(options.ensembl)
    except:
        print '\nError: Ensembl release specified is not an integer. Use option -h to get help!\n'
        quit()

    if options.output is None:
        print '\nError: no output file name specified. Use option -h to get help!\n'
        quit()

    # Must use Ensembl release >= 70 or v65
    if not (options.ensembl >= 70 or options.ensembl == 65):
        print '\nError: This version works with Ensembl v65 or >= v70.\n'
        quit()

    # Genome build
    genome_build = 'GRCh37' if options.ensembl <= 75 else 'GRCh38'

    # Print info
    print 'Ensembl version:  ' + str(options.ensembl)
    print 'Reference genome: ' + genome_build

    # Creating compressed output file
    Nretrieved = helper.process_data(options, genome_build)
    print '\nA total of ' + str(Nretrieved) + ' transcripts have been included\n'

    # Indexing output file with Tabix
    helper.indexFile(options)

    # Removing uncompressed output file
    os.remove(options.output)

    # Printing out summary information
    print ''
    print '---------------------'
    print 'Output files created:'
    print '---------------------'
    print options.output + '.gz (transcript database)'
    print options.output + '.gz.tbi (index file)'
    print options.output + '.txt (list of transcripts)'
Ejemplo n.º 5
0
def main():
	filename = rd[1]
	# no of features to represent each "thing" 
	# more is better, but more expensive
	n = int(rd[2])
	learning_rate = float(rd[3]) 
	# regularized
	try:
		regular = float(rd[4])
	except:
		# regular = False
		regular = config.regular

	dataset = np.matrix(helper.process_data(filename))
	
	# this contains the i,j and value of the filled elements
	global fill_array
	fill_array = getFilled(dataset)
	n_m, n_u = dataset.shape
	# learn the defining features as well as preferences for users
	final_xi_s, final_thetas = grad_descent(n_m, n_u, n, learning_rate, regular)
	print("FINAL THETAS", final_thetas, "FINAL XIS", final_xi_s, sep = "\n")
	# fill in the approx blanks now that params have been learnt
	new_dataset = fill_data(dataset, final_xi_s, final_thetas)
def recall(labels, pred):
    recall = recall_score(labels, pred)
    return recall


def extract_weighted_columns(data):
    data = data[:, [1, 2, 4, 6, 8, 9, 10, 15, 17, 18, 21, 25, 27, 28, 29]]
    return data


if __name__ == "__main__":

    training_data = genfromtxt('training.csv', dtype=str, delimiter=',')
    testing_data = genfromtxt('testing.csv', dtype=str, delimiter=',')

    train_data, train_labels, train_weights = helper.process_data(
        training_data)
    test_data, test_labels, test_weights = helper.process_data(testing_data)

    train_data_weighted = helper.normalize_data(
        helper.replace_missing_values(extract_weighted_columns(train_data)))
    test_data_weighted = helper.normalize_data(
        helper.replace_missing_values(extract_weighted_columns(test_data)))

    models.run_lr(train_data_weighted, train_labels, test_data_weighted,
                  test_labels, test_weights)
    models.run_gnb(train_data_weighted, train_labels, test_data_weighted,
                   test_labels, test_weights)
    models.run_gradient_boosting(train_data_weighted, train_labels,
                                 test_data_weighted, test_labels, test_weights)
    models.run_decision_tree(train_data_weighted, train_labels,
                             test_data_weighted, test_labels, test_weights)
Ejemplo n.º 7
0
import nltk
from helper import process_data

train = process_data('train')

x = train['question'][0]
    parser.add_argument("-e"
                        "--epochs",
                        dest='epochs',
                        action="store",
                        type=int,
                        default=5,
                        help="number of epochs for training, default 5")
    parser.add_argument("-d"
                        "--device",
                        dest='device',
                        action="store",
                        type=str,
                        default="cuda",
                        help="device for training,default cuda")
    args = parser.parse_args()
    train_datasets, trainloader, validloader, testloader = process_data(
        args.data_dir)

    model = build_model(args.arch, args.hidden_units, args.output_units)
    running_losses, running_valid_losses, trained_model = train_model(
        args.data_dir, model, args.learning_rate, args.epochs, args.device)
    test_data(trained_model, args.data_dir, args.device)
    trained_model.class_to_idx = train_datasets.class_to_idx
    #device = torch.to("cuda" if torch.cuda.is_available() and args.device == 'cuda' else "cpu")
    #trained_model.to(device)
    torch.save(
        {
            'arch': args.arch,
            'hidden_units': args.hidden_units,
            'output_units': args.output_units,
            'state_dict': trained_model.state_dict(),
            'class_to_idx': trained_model.class_to_idx
Ejemplo n.º 9
0
def train_model(data_dir, model, learning_rate, epochs, device):
    device = torch.device(
        "cuda" if torch.cuda.is_available() and device == 'cuda' else "cpu")
    model.to(device)
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)
    epochs = epochs

    train_datasets, trainloader, validloader, testloader = process_data(
        data_dir)
    running_losses, running_valid_losses = [], []
    for e in range(epochs):
        running_loss = 0
        corrects = 0
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(
                device)  #Make sure that the training step is running on gpu

            #clean up accumulated gradients before training the new batch
            optimizer.zero_grad()

            #Forward and backward pass
            log_ps = model.forward(images)
            loss = criterion(log_ps, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item(
            )  #add loss of the batch to the running loss

        #use the validation datset to compare train and validation loss
        else:
            running_valid_loss = 0
            running_accuracy = 0
            with torch.no_grad():
                model.eval()  #set model to evaluation mode to stop dropout
                for images, labels in validloader:
                    images, labels = images.to(device), labels.to(device)
                    log_ps = model.forward(images)
                    valid_loss = criterion(log_ps, labels)
                    running_valid_loss += valid_loss.item()

                    ps = torch.exp(log_ps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy = torch.mean(equals.type(torch.FloatTensor))
                    running_accuracy += accuracy.item()

            model.train()

            print(
                "Epoch:",
                e + 1,
                "Training loss:{:.3f}..".format(running_loss /
                                                len(trainloader)),
                "Validation loss:{:.3f}..".format(running_valid_loss /
                                                  len(validloader)),
                #"Running Accuracy:{:.3f}..".format(running_accuracy),
                "Validation accuracy:{:.3f}%..".format(running_accuracy * 100 /
                                                       len(validloader)))

            running_losses.append(running_loss / len(trainloader))
            running_valid_losses.append(running_valid_loss / len(validloader))

    print('Train Losses:', running_losses, 'Validation Losses:',
          running_valid_losses)

    return running_losses, running_valid_losses, model