def pooled_processing_fastq(fastqfile_options): # Cannot send logger object to functions run in a multiprocessing Pool. logger = logging.getLogger(__name__ + '.pooled_processing_fastq') print(logger.handlers) try: fastqfile, options = fastqfile_options[0], fastqfile_options[1] modelName = path.splitext(path.basename(options.hmm_model))[0] fastqBaseName = path.splitext(path.basename(fastqfile))[0] fastqFilesBaseName = path.basename(fastqfile) fastafile = '%s/%s.fasta' % (path.abspath( options.tmp_dir), fastqBaseName) hmmOut = '%s/%s-%s-hmmsearched.out' % (path.abspath( options.hmm_out_dir), fastqBaseName, modelName) hitFile = '%s/%s-positives.out' % (path.abspath( options.tmp_dir), fastqBaseName) logger.info('Converting fastq to fasta') if options.rerun: peptideFile = '%s/%s-amino.fasta' % (options.amino_dir, fastqBaseName) if path.isfile(peptideFile): logger.info('Performing hmmsearch') utils.perform_hmmsearch(peptideFile, options.hmm_model, hmmOut, options) else: fastafile = '%s/%s.fasta' % (options.fasta_dir, fastqBaseName) logger.info('Translating and searching') utils.translate_and_search(fastafile, options.hmm_model, hmmOut, options) elif options.store_peptides: logger.info('Translating') peptideFile = '%s/%s-amino.fasta' % (path.abspath( options.tmp_dir), fastqBaseName) frame = '6' if not options.rerun: utils.translate_sequence(fastafile, peptideFile, options, frame) logger.info('Performing hmmsearch') utils.perform_hmmsearch(peptideFile, options.hmm_model, hmmOut, options) else: logger.info('Translating and searching') utils.translate_and_search(fastafile, options.hmm_model, hmmOut, options) logger.info('Start to classify') utils.classifier(hmmOut, hitFile, options) logger.info('Translating, searching, and classification done') fastqPath = path.dirname( path.abspath(fastqfile) ) # Assuming the path is the same to every input fastqfile return fastqFilesBaseName, hitFile except KeyboardInterrupt: raise KeyboardInterruptError()
num_workers=1) # test for accuracy true = 0 true_adv = 0 total = len(dataset) for image, label in dataloader: image = image.cuda() label = label.cuda() output, adv_out = add_adv(classifier, image, label, 'fgsm', default=True) output_class = classifier(output) adv_output_class = classifier(adv_out) def_out, _, _, _ = model(adv_out) cleaned_class = classifier(def_out) true_class = torch.argmax(output_class, 1) adv_class = torch.argmax(adv_output_class, 1) adv_clean_class = torch.argmax(cleaned_class, 1) print(f'attack method fgsm') print(f'actual class {true_class}') print(f'actual advclass {adv_class}') print(f'adversarial class {adv_clean_class}') true += torch.sum(torch.eq(true_class, adv_clean_class)) true_adv += torch.sum(torch.eq(true_class, adv_class))
import pandas as pd from utils.scraper import * from utils.classifier import * from utils.db_controller import * from utils.json_reader import * print('Process started... "Scraping profiles"') scraper = Scraper() print('Process ended... "Scraping profiles"') print('Process started... "Reading JSON files"') reader = json_reader() posts, comments = reader.get_df() print('Process ended... "Reading JSON FILES"') print('Process started... "Classifying comments"') classif = classifier(comments.copy()) comments = classif.get_df() print('Process ended... "Classifying comments"') print('Process started... "Inserting into DB"') db = db_controller(posts, comments) db.insert_into() print('Process ended... "Inserting into DB"') print('Cleaning the JSON files.') reader.clean_files()
test_data = datasets.ImageFolder(test_dir, transform=test_transforms) train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=32) test_loader = torch.utils.data.DataLoader(test_data, batch_size=32) image_datasets = { 'train_data': train_data, 'test_data': test_data, 'valid_data': valid_data } # Creating classifier with function model = u.classifier(arch, hidden_units, dropout, categories) # Using NLLLoss with Softmax criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=0.0008) # Using function to training and validation u.train(model, epochs, train_loader, valid_loader, optimizer, criterion, device) model.class_to_idx = image_datasets['train_data'].class_to_idx # Saving Checkpoint u.save_cp(arch, model, hidden_units, dropout, epochs, optimizer, categories, save_dir)
def parse_fasta_input(options, Results, logger): modelName = path.splitext(path.basename(options.hmm_model))[0] logger.info('Parsing FASTA files') frame = '6' for fastafile in options.infiles: fastaBaseName = path.splitext(path.basename(fastafile))[0] hmmOut = '%s/%s-%s-hmmsearched.out' % (path.abspath( options.hmm_out_dir), fastaBaseName, modelName) fastaOut = '%s/%s-%s-filtered.fasta' % (path.abspath( options.final_gene_dir), fastaBaseName, modelName) aminoOut = '%s/%s-%s-filtered-peptides.fasta' % (path.abspath( options.final_gene_dir), fastaBaseName, modelName) orfFile = '%s/%s-%s-predicted-orfs.fasta' % (path.abspath( options.final_gene_dir), fastaBaseName, modelName) orfAminoFile = '%s/%s-%s-predicted-orfs-amino.fasta' % (path.abspath( options.final_gene_dir), fastaBaseName, modelName) hitFile = '%s/%s-positives.out' % (path.abspath( options.tmp_dir), fastaBaseName) elongated_fasta = '%s/%s-gene-elongated.fasta' % (path.abspath( options.tmp_dir), fastaBaseName) if options.protein: utils.perform_hmmsearch(fastafile, options.hmm_model, hmmOut, options) utils.classifier(hmmOut, hitFile, options) hitDict = utils.create_dictionary(hitFile, options) utils.retrieve_fasta(hitDict, fastafile, fastaOut, options) else: if options.store_peptides: peptideFile = '%s/%s-amino.fasta' % (path.abspath( options.tmp_dir), fastaBaseName) utils.translate_sequence(fastafile, peptideFile, options, frame) logger.info('Performing hmmsearch') utils.perform_hmmsearch(peptideFile, options.hmm_model, hmmOut, options) else: utils.translate_and_search(fastafile, options.hmm_model, hmmOut, options) utils.classifier(hmmOut, hitFile, options) hitDict = utils.create_dictionary(hitFile, options) utils.retrieve_fasta(hitDict, fastafile, fastaOut, options) if not path.isfile(fastaOut): logger.critical('Could not find file %s', fastaOut) # exit() else: utils.retrieve_surroundings(hitDict, fastafile, elongated_fasta) if path.isfile(elongated_fasta): if not options.orf_finder: tmpORFfile = '%s/%s-long-orfs.fasta' % ( options.tmp_dir, fastaBaseName) predict_orfs_prodigal(elongated_fasta, options.tmp_dir, tmpORFfile, options.min_orf_length) orfFile = utils.retrieve_predicted_orfs( options, tmpORFfile) else: tmpORFfile = '%s/%s-long-orfs.fasta' % ( options.tmp_dir, fastaBaseName) predict_orfs_orfFinder(elongated_fasta, options.tmp_dir, tmpORFfile, options.min_orf_length) orfFile = utils.retrieve_predicted_orfs( options, tmpORFfile) if options.store_peptides: options.retrieve_whole = False utils.retrieve_peptides(hitDict, peptideFile, aminoOut, options) else: tmpFastaOut = utils.make_fasta_unique(fastaOut, options) utils.retrieve_predicted_genes_as_amino(options, tmpFastaOut, aminoOut, frame='6') Results.count_hits(hitFile) if path.isfile(orfFile): if not options.orf_finder: Results.count_orfs_genomes(orfFile) else: Results.predictedOrfs = Results.count_contigs(orfFile) return orfFile
#加载数据集 trainData = h5_dataset(train_list) valData = h5_dataset(val_list) trainDataLoader = DataLoader(trainData, batch_size=bs, shuffle=True, num_workers=4) valDataLoader = DataLoader(valData, batch_size=bs, shuffle=False, num_workers=4) #构建网络 feature_dim = trainData.feature.size(1) net = classifier(feature_dim, numclasses) optimizer = optim.Adam(net.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() if use_gpu: net.cuda() criterion = criterion.cuda() if __name__ == '__main__': #训练测试 for e in range(epoch): net.train() train_loss = 0 train_correct = 0 for i, (batchData, batchLabel) in enumerate(trainDataLoader): if use_gpu: batchData, batchLabel = batchData.cuda(), batchLabel.cuda()
dataloader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=True, num_workers=1) # adversarial methods adv_accuracy = {'fgsm': 0, 'r-fgsm': 0, 'cw': 0, 'mi-fgsm': 0, 'pgd': 0, 'single': 0} # test for accuracy for adv in adv_accuracy: true = 0 total = len(dataset) for image, label in dataloader: image = image.cuda() label = label.cuda() # get model output output, adv_out = add_adv(classifier, image, label, adv, default=True) output_class = classifier(output) def_out, _, _, _ = model(adv_out) adv_out_class = classifier(def_out) # get model predicted class true_class = torch.argmax(output_class, 1) adversarial_class = torch.argmax(adv_out_class, 1) print(f'attack method {adv}') print(f'actual class {true_class}') print(f'adversarial class {adversarial_class}') # calculate number of correct classification true += torch.sum(torch.eq(true_class, adversarial_class)) print(int(true) / total)
num_workers=1) # adversarial methods adv_list = ['fgsm', 'r-fgsm', 'cw'] # test for accuracy xs = list() ys = list() advs = list() for image, label in dataloader: image = image.cuda() label = label.cuda() batch += 1 print(batch) for i in range(3): for adv in adv_list: output, adv_out = add_adv(classifier, image, label, adv, i) output = classifier(output) adv_class = classifier(adv_out) print('attack method {}'.format(adv)) print('actual class ', torch.argmax(output, 1)) print('adversarial class ', torch.argmax(adv_class, 1)) print('====================================') xs.append(image.cpu().detach().numpy()) ys.append(label.cpu().detach().numpy()) advs.append(adv_out.cpu().detach().numpy()) adv_x = np.concatenate(advs, axis=0) xt = np.concatenate(xs, axis=0) yt = np.concatenate(ys, axis=0) np.save('../data/' + 'advs_mnist.npy', adv_x) np.save('../data/' + 'xs_mnist.npy', xt)