Esempi in Python per Bar, esempi in Python per progressbar.Bar

Esempio n. 1

0

Mostra file

File: new_binaries_with_netconns.py Progetto: yepri2216/cbapi-python

def main():

    #
    # Disable requests insecure warnings
    #
    disable_insecure_warnings()

    #
    # parse arguments
    #
    parser = build_cli_parser("New Binaries with Netconns")
    parser.add_argument("-d", "--date-to-query", action="store", dest="date",
                        help="New since DATE, format YYYY-MM-DD")
    parser.add_argument("-f", "--output-file", action="store", dest="output_file",
                        help="output file in csv format")

    opts = parser.parse_args()
    if not opts.date:
        parser.print_usage()
        sys.exit(-1)
    #
    # Initalize the cbapi-ng
    # TODO get_cb_object
    #
    cb = get_cb_response_object(opts)

    #
    # Main Query
    #
    start_date = "[" + opts.date + "T00:00:00 TO *]"
    binary_query = cb.select(Binary).where(("host_count:[1 TO 3]"
                                            " server_added_timestamp:" + start_date +
                                            " -observed_filename:*.dll"
                                            " -digsig_publisher:Microsoft*"
                                            " -alliance_score_srstrust:*"))
    #
    # Setup the csv writer
    #
    if not opts.output_file:
        output_file = open("new_binaries_with_netconns.csv", 'wb')
    else:
        output_file = open(opts.output_file, 'wb')
    csv_writer = csv.writer(output_file)
    #
    # Write out CSV header
    #
    csv_writer.writerow(("FileName", "Hostname", "Username", "Network Connections",
                         "Process Link", "Binary Link", "Binary MD5", "Signature Status", "Company",
                         "Observed Date", "Host Count", "Binary TimeStamp"))

    #
    # Create Progress Bar
    #
    pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=len(binary_query)).start()

    for i, binary in enumerate(binary_query):

        #
        # Update progress bar
        #
        pbar.update(i + 1)

        #
        # Retrieve the binary timestamp
        #
        binary_timestamp = time.asctime(time.gmtime(pefile.PE(data=binary.file.read()).FILE_HEADER.TimeDateStamp))

        #
        # Build a sub query to see if this binary was executed and had netconns
        #
        sub_query = "process_md5:" + binary.md5 + " netconn_count:[1 TO *]"
        process_query = cb.select(Process).where(sub_query)

        #
        # Iterate through results
        #
        for process in process_query:

            #
            # Write out the result
            #
            try:
                csv_writer.writerow((process.path,
                                     process.hostname,
                                     process.username,
                                     process.netconn_count,
                                     process.webui_link,
                                     binary.webui_link,
                                     binary.md5,
                                     binary.signing_data.result if binary.signing_data.result else "UNSIGNED",
                                     binary.company_name,
                                     binary.server_added_timestamp,
                                     binary.host_count,
                                     binary_timestamp))
            except Exception:
                print(binary)

    pbar.finish()

Esempio n. 2

0

Mostra file

def perform_evaluation(trees, features, samples, n_iterations, n_classes,
                       testing_algorithms):
    if n_classes > 2 and ("opencv_plain" in testing_algorithms
                          or "opencv_adaboost" in testing_algorithms):
        raise Exception("OpenCV does not support more than two classes")
    permutations = list(product(*[trees, features, samples]))
    for (n_trees, n_features, n_samples) in permutations:
        steps = len(testing_algorithms) + 1

        times = [[] for _ in testing_algorithms]
        scores = [[] for _ in testing_algorithms]

        if n_classes > 2:
            f1_average = "macro"
        else:
            f1_average = "binary"

        pbar = ProgressBar(widgets=[Percentage(), Bar(),
                                    ETA()],
                           maxval=steps * n_iterations).start()

        print(("Current parameters: n_trees=" + str(n_trees) + " n_samples=" +
               str(n_samples) + " n_features=" + str(n_features)))

        for index in range(n_iterations):
            current_step = 0
            # Generate samples
            trainX, trainY = create_samples(n_samples, n_features, n_classes)
            testX, testY = create_samples(n_samples, n_features, n_classes)
            pbar.update(index * steps + current_step)
            current_step += 1

            for algorithm in testing_algorithms:
                if algorithm == "fertilized_plain":
                    fertilizedForest = setup_fertilized(
                        n_trees, n_features, n_classes, 1, 2, "PLAIN", False)
                    fertilizedTrainY = np.ascontiguousarray(
                        np.atleast_2d(trainY).T)
                    start = clock()
                    fertilizedForest.fit(trainX, fertilizedTrainY)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = fertilizedForest.predict(testX)
                    scores[current_step - 1].append(
                        f1_score(testY, [np.argmax(x) for x in prediction],
                                 average=f1_average))
                elif algorithm == "fertilized_samme":
                    fertilizedForest = setup_fertilized(
                        n_trees, n_features, n_classes, 1, 2, "SAMME", False)
                    fertilizedTrainY = np.ascontiguousarray(
                        np.atleast_2d(trainY).T)
                    start = clock()
                    fertilizedForest.fit(trainX, fertilizedTrainY)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = fertilizedForest.predict(testX)
                    scores[current_step - 1].append(
                        f1_score(testY, [np.argmax(x) for x in prediction],
                                 average=f1_average))
                elif algorithm == "fertilized_samme_r":
                    fertilizedForest = setup_fertilized(
                        n_trees, n_features, n_classes, 1, 2, "SAMME.R", False)
                    fertilizedTrainY = np.ascontiguousarray(
                        np.atleast_2d(trainY).T)
                    start = clock()
                    fertilizedForest.fit(trainX, fertilizedTrainY)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = fertilizedForest.predict(testX)
                    scores[current_step - 1].append(
                        f1_score(testY, [np.argmax(x) for x in prediction],
                                 average=f1_average))
                elif algorithm == "fertilized_adaboost":
                    fertilizedForest = setup_fertilized(
                        n_trees, n_features, n_classes, 1, 2, "ADABOOST",
                        False)
                    fertilizedTrainY = np.ascontiguousarray(
                        np.atleast_2d(trainY).T)
                    start = clock()
                    fertilizedForest.fit(trainX, fertilizedTrainY)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = fertilizedForest.predict(testX)
                    scores[current_step - 1].append(
                        f1_score(testY, [np.argmax(x) for x in prediction],
                                 average=f1_average))
                elif algorithm == "fertilized_samme_r_leafman":
                    fertilizedForest = setup_fertilized(
                        n_trees, n_features, n_classes, 1, 2, "SAMME.R", True)
                    fertilizedTrainY = np.ascontiguousarray(
                        np.atleast_2d(trainY).T)
                    start = clock()
                    fertilizedForest.fit(trainX, fertilizedTrainY)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = fertilizedForest.predict(testX)
                    scores[current_step - 1].append(
                        f1_score(testY, [np.argmax(x) for x in prediction],
                                 average=f1_average))
                elif algorithm == "sklearn_plain":
                    sklearnForest = setup_sklearn_randomForest(
                        n_trees, n_features, 1, 2)
                    start = clock()
                    sklearnForest.fit(trainX, trainY)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = sklearnForest.predict(testX)
                    scores[current_step - 1].append(
                        f1_score(testY, prediction, average=f1_average))
                elif algorithm == "sklearn_samme":
                    sklearnBoost = setup_sklearn_adaBoost(
                        n_trees, n_features, 1, 2, "SAMME")
                    start = clock()
                    sklearnBoost.fit(trainX, trainY)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = sklearnBoost.predict(testX)
                    scores[current_step - 1].append(
                        f1_score(testY, prediction, average=f1_average))
                elif algorithm == "sklearn_samme_r":
                    sklearnBoost = setup_sklearn_adaBoost(
                        n_trees, n_features, 1, 2, "SAMME.R")
                    start = clock()
                    sklearnBoost.fit(trainX, trainY)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = sklearnBoost.predict(testX)
                    scores[current_step - 1].append(
                        f1_score(testY, prediction, average=f1_average))
                elif algorithm == "opencv_plain":
                    p = setup_opencv_rtrees(n_trees, n_features, 1, 2)
                    opencvForest = RTrees()
                    start = clock()
                    opencvForest.train(trainX,
                                       CV_ROW_SAMPLE,
                                       trainY.astype("int32"),
                                       params=p)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = [
                        int(x)
                        for x in [opencvForest.predict(x) for x in testX]
                    ]
                    scores[current_step - 1].append(
                        f1_score(testY, prediction, average=f1_average))
                elif algorithm == "opencv_adaboost":
                    p = setup_opencv_boost(n_trees, 1)
                    opencvBoost = Boost()
                    start = clock()
                    opencvBoost.train(trainX,
                                      CV_ROW_SAMPLE,
                                      trainY.astype("int32"),
                                      params=p)
                    end = clock()
                    times[current_step - 1].append(end - start)
                    prediction = [
                        int(x)
                        for x in [opencvBoost.predict(x) for x in testX]
                    ]
                    scores[current_step - 1].append(
                        f1_score(testY, prediction, average=f1_average))
                pbar.update(index * steps + current_step)
                current_step += 1

        pbar.finish()

        output = []
        for idx, alg in enumerate(testing_algorithms):
            curr = [alg, str(np.mean(scores[idx])), str(np.mean(times[idx]))]
            output.append(curr)

        table = tabulate(output,
                         headers=["Algorithm", "F1 Score", "Fit Time in s"])
        print(table)

Esempio n. 3

0

Mostra file

File: adrec_edges2shoes_ali_nospv_fm.py Progetto: xuyanwu/ALICE

def main():

    global args
    args = parser.parse_args()

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    task_name = args.task_name

    epoch_size = args.epoch_size
    batch_size = args.batch_size

    result_path = os.path.join(args.result_path, args.task_name,
                               args.model_arch)
    model_path = os.path.join(args.model_path, args.task_name, args.model_arch)

    data, myidx, test_style_A, test_style_B = get_data()
    # 84*11 x 3 x 64 x 64 , 85*11 x 3 x 64 x 64, 14*11 x 3 x 64 x 64, 14*11 x 3 x 64 x 64

    test = test_style_A + test_style_B

    if not args.task_name.startswith('car') and not args.task_name.endswith(
            'car'):
        test_A = read_images(filenames=test,
                             domain='A',
                             image_size=args.image_size)
        test_B = read_images(filenames=test,
                             domain='B',
                             image_size=args.image_size)

    test_A = Variable(torch.FloatTensor(test_A), volatile=True)
    test_B = Variable(torch.FloatTensor(test_B), volatile=True)

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    generator_A = Generator(extra_layers=True)
    generator_B = Generator(extra_layers=True)
    discriminator_ali = ad_Discriminator()
    discriminator_ReconA = ad_Discriminator_fm1()
    discriminator_ReconB = ad_Discriminator_fm1()

    if cuda:
        test_A = test_A.cuda()
        test_B = test_B.cuda()
        generator_A = generator_A.cuda()
        generator_B = generator_B.cuda()
        discriminator_ali = discriminator_ali.cuda()
        discriminator_ReconA = discriminator_ReconA.cuda()
        discriminator_ReconB = discriminator_ReconB.cuda()

    data_size = len(data)
    n_batches = (data_size // batch_size)

    recon_criterion = nn.MSELoss()
    gan_criterion = nn.BCELoss()
    #feat_criterion = nn.HingeEmbeddingLoss()
    feat_criterion = nn.MSELoss()
    spv_criterion = nn.MSELoss()

    gen_params = chain(generator_A.parameters(), generator_B.parameters())
    dis_params = chain(discriminator_ali.parameters(),
                       discriminator_ReconA.parameters(),
                       discriminator_ReconB.parameters())

    optim_gen = optim.Adam(gen_params,
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)
    optim_dis = optim.Adam(dis_params,
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)

    iters = 0

    gen_loss_total = []
    dis_loss_total = []

    for epoch in range(epoch_size):

        _idx_A = list(range(len(data)))
        np.random.shuffle(_idx_A)
        _idx_B = list(range(len(data)))
        np.random.shuffle(_idx_B)
        data_A = np.array(data)[np.array(_idx_A)]
        data_B = np.array(data)[np.array(_idx_B)]

        widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()]
        pbar = ProgressBar(maxval=n_batches, widgets=widgets)
        pbar.start()

        for i in range(n_batches):

            pbar.update(i)

            generator_A.zero_grad()
            generator_B.zero_grad()
            discriminator_ali.zero_grad()
            discriminator_ReconA.zero_grad()
            discriminator_ReconB.zero_grad()

            ############################################################################# un_spv
            _path_A = data_A[i * batch_size:(i + 1) * batch_size]
            _path_B = data_B[i * batch_size:(i + 1) * batch_size]
            A = read_images(filenames=_path_A,
                            domain='A',
                            image_size=args.image_size)
            B = read_images(filenames=_path_B,
                            domain='B',
                            image_size=args.image_size)
            A = Variable(torch.FloatTensor(A))
            B = Variable(torch.FloatTensor(B))

            if cuda:
                A = A.cuda()
                B = B.cuda()

            AB = generator_B(A)
            BA = generator_A(B)
            # Use discriminator to replace Reconstruction Loss
            ABA = generator_A(AB)
            BAB = generator_B(BA)
            A_t = torch.cat([A, A], 1).cuda()  # 64 x 9 x 64 x 64
            A_f = torch.cat([A, ABA], 1).cuda()  # 64 x 9 x 64 x 64
            ReconA_dis_real, ReconA_feats_real = discriminator_ReconA(A_t)
            ReconA_dis_fake, ReconA_feats_fake = discriminator_ReconA(A_f)

            dis_loss_ReconA, gen_loss_ReconA = get_gan_loss(
                ReconA_dis_real, ReconA_dis_fake, gan_criterion, cuda)
            fm_loss_ReconA = get_fm_loss(ReconA_feats_real, ReconA_feats_fake,
                                         feat_criterion)

            B_t = torch.cat([B, B], 1).cuda()  # 64 x 9 x 64 x 64
            B_f = torch.cat([B, BAB], 1).cuda()  # 64 x 9 x 64 x 64
            ReconB_dis_real, ReconB_feats_real = discriminator_ReconB(B_t)
            ReconB_dis_fake, ReconB_feats_fake = discriminator_ReconB(B_f)

            dis_loss_ReconB, gen_loss_ReconB = get_gan_loss(
                ReconB_dis_real, ReconB_dis_fake, gan_criterion, cuda)
            fm_loss_ReconB = get_fm_loss(ReconB_feats_real, ReconB_feats_fake,
                                         feat_criterion)

            # Real/Fake GAN Loss (A)
            tuple_1 = torch.cat([A, AB], 1).cuda()  # 64 x 6 x 64 x 64
            tuple_2 = torch.cat([BA, B], 1).cuda()  # 64 x 6 x 64 x 64
            dis_real, feats_real = discriminator_ali(tuple_1)
            dis_fake, feats_fake = discriminator_ali(tuple_2)

            dis_loss, gen_loss = get_ali_loss(dis_real, dis_fake,
                                              gan_criterion, cuda)
            fm_loss = get_fm_loss(feats_real, feats_fake, feat_criterion)
            if iters < args.gan_curriculum:
                rate = args.starting_rate
            else:
                rate = args.default_rate

            gen_loss_A_total = ((fm_loss * 0.9 + gen_loss * 0.1) *
                                (1. - rate)) / 2.0
            gen_loss_B_total = ((fm_loss * 0.9 + gen_loss * 0.1) *
                                (1. - rate)) / 2.0
            gen_loss_ReconA_total = (fm_loss_ReconB * 0.9 +
                                     gen_loss_ReconB * 0.1) * (1. - rate) / 2.0
            gen_loss_ReconB_total = (fm_loss_ReconA * 0.9 +
                                     gen_loss_ReconA * 0.1) * (1. - rate) / 2.0

            #############################################################################

            if args.model_arch == 'adrec_edges2shoes_ali_nospv_fm':
                gen_loss = gen_loss_A_total + gen_loss_B_total + gen_loss_ReconA_total + gen_loss_ReconB_total
                dis_loss = dis_loss + dis_loss_ReconA + dis_loss_ReconB
            elif args.model_arch == 'gan':
                gen_loss = gen_loss_B
                dis_loss = dis_loss_B

            if iters % args.update_interval == 0:
                dis_loss.backward()
                optim_dis.step()
            else:
                gen_loss.backward()
                optim_gen.step()

            if iters % args.log_interval == 0:
                print("---------------------")
                print("GEN Loss:", as_np(gen_loss.mean()),
                      as_np(gen_loss_ReconA.mean()),
                      as_np(gen_loss_ReconB.mean()))
                print("DIS Loss:", as_np(dis_loss.mean()),
                      as_np(dis_loss_ReconA.mean()),
                      as_np(dis_loss_ReconB.mean()))

            if iters % args.image_save_interval == 0:
                AB = generator_B(test_A)
                BA = generator_A(test_B)
                ABA = generator_A(AB)
                BAB = generator_B(BA)

                n_testset = min(test_A.size()[0], test_B.size()[0])

                subdir_path = os.path.join(
                    result_path, str(iters / args.image_save_interval))

                if os.path.exists(subdir_path):
                    pass
                else:
                    os.makedirs(subdir_path)

                for im_idx in range(n_testset):
                    A_val = test_A[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    B_val = test_B[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    BA_val = BA[im_idx].cpu().data.numpy().transpose(1, 2,
                                                                     0) * 255.
                    ABA_val = ABA[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    AB_val = AB[im_idx].cpu().data.numpy().transpose(1, 2,
                                                                     0) * 255.
                    BAB_val = BAB[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.

                    filename_prefix = os.path.join(subdir_path, str(im_idx))
                    scipy.misc.imsave(filename_prefix + '.A.jpg',
                                      A_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.B.jpg',
                                      B_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.BA.jpg',
                                      BA_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.AB.jpg',
                                      AB_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.ABA.jpg',
                                      ABA_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.BAB.jpg',
                                      BAB_val.astype(np.uint8)[:, :, ::-1])

            if iters % args.model_save_interval == 0:
                torch.save(
                    generator_A,
                    os.path.join(
                        model_path, 'model_gen_A-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    generator_B,
                    os.path.join(
                        model_path, 'model_gen_B-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    discriminator_ali,
                    os.path.join(
                        model_path, 'model_dis_ali-' +
                        str(iters / args.model_save_interval)))
                #torch.save( discriminator_spv_A, os.path.join(model_path, 'model_dis_spv_A-' + str( iters / args.model_save_interval )))
                #torch.save( discriminator_spv_B, os.path.join(model_path, 'model_dis_spv_B-' + str( iters / args.model_save_interval )))

            iters += 1

Esempio n. 4

0

Mostra file

File: experiments.py Progetto: akashrajkn/waffles-and-posteriors

def experiment_one():

    model_dir = './models/mnf_lenet_mnist_fq2_fr2_usezTrue_thres0.5/model/'

    # pyx = tf.get_variable("pyx")

    # with tf.Session() as sess:
    # sess = tf.InteractiveSession()
    sess = tf.Session()

    mnist = MNIST()
    (xtrain, ytrain), (xvalid, yvalid), (xtest, ytest) = mnist.images()
    xtrain, xvalid, xtest = np.transpose(xtrain, [0, 2, 3, 1]), np.transpose(
        xvalid, [0, 2, 3, 1]), np.transpose(xtest, [0, 2, 3, 1])
    ytrain, yvalid, ytest = to_categorical(ytrain, 10), to_categorical(
        yvalid, 10), to_categorical(ytest, 10)

    N, height, width, n_channels = xtrain.shape
    iter_per_epoch = N / 100

    input_shape = [None, height, width, n_channels]
    x = tf.placeholder(tf.float32, input_shape, name='x')
    y_ = tf.placeholder(tf.float32, [None, 10], name='y_')

    model = MNFLeNet(N,
                     input_shape=input_shape,
                     flows_q=2,
                     flows_r=2,
                     use_z=False,
                     learn_p=True,
                     thres_var=0.5,
                     flow_dim_h=50)

    tf.set_random_seed(1)
    np.random.seed(1)
    y = model.predict(x)
    yd = model.predict(x, sample=False)
    pyx = tf.nn.softmax(y)

    saver = tf.train.import_meta_graph(model_dir + 'mnf.meta')
    saver.restore(sess, tf.train.latest_checkpoint(model_dir))

    # saver = tf.train.latest_checkpoint(model_dir + '**mnf**')
    # saver.restore(sess, model_dir + 'mnf.json')
    # saver.restore(sess, model_dir + 'mnf')

    all_vars = tf.get_collection('vars')
    for v in all_vars:
        v_ = sess.run(v)

    print("loaded")

    print '------------------------------------------------'
    print '-                MNIST rotated                 -'

    data_path = '../../data/mnist/mnist_rotated.pkl'
    if os.path.exists(data_path):
        with open(data_path, 'rb') as f:
            data = pickle.load(f)
        X = data['X']
        y = data['y']
    else:
        # X, y = test_mnist_rot(plot=False)
        # save_mnist_to_file(X, y)
        pass

    X = X.reshape((X.shape[0], 1, 28, 28))

    print X.shape

    X = np.transpose(X, [0, 2, 3, 1])
    # X = X[:, np.newaxis, :, :]
    y = to_categorical(y, 10)

    print 'Data loaded'

    preds = np.zeros_like(y)
    widgets = ["Sampling |", Percentage(), Bar(), ETA()]
    pbar = ProgressBar(10, widgets=widgets)
    pbar.start()
    for i in xrange(10):
        pbar.update(i)
        for j in xrange(1):
            pyxi = sess.run(pyx, feed_dict={x: X[0:10]})
            preds[0:10] += pyxi / 10
    print
    sample_accuracy = np.mean(np.equal(np.argmax(preds, 1), np.argmax(y, 1)))
    print 'Sample test accuracy: {}'.format(sample_accuracy)

    print '------------------------------------------------'

Esempio n. 5

0

Mostra file

File: __init__.py Progetto: ceduranosuna/NewBs

def get_progressbar(maxval=10000):
    pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=maxval).start()
    return pbar

Esempio n. 6

0

Mostra file

    def train(self):
        config = tf.ConfigProto(allow_soft_placement=True)
        with tf.Session(config=config) as sess:
            with tf.device("/gpu:%d" % cfg.GPU_ID):
                counter = self.build_model(sess)
                saver = tf.train.Saver(tf.all_variables(),
                                       keep_checkpoint_every_n_hours=2)

                # summary_op = tf.merge_all_summaries()
                summary_writer = tf.train.SummaryWriter(self.log_dir,
                                                        sess.graph)

                keys = ["d_loss", "g_loss"]
                log_vars = []
                log_keys = []
                for k, v in self.log_vars:
                    if k in keys:
                        log_vars.append(v)
                        log_keys.append(k)
                        # print(k, v)
                generator_lr = cfg.TRAIN.GENERATOR_LR
                discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR
                num_embedding = cfg.TRAIN.NUM_EMBEDDING
                lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH
                number_example = self.dataset.train._num_examples
                updates_per_epoch = int(number_example / self.batch_size)
                epoch_start = int(counter / updates_per_epoch)
                for epoch in range(epoch_start, self.max_epoch):
                    widgets = ["epoch #%d|" % epoch,
                               Percentage(), Bar(), ETA()]
                    pbar = ProgressBar(maxval=updates_per_epoch,
                                       widgets=widgets)
                    pbar.start()

                    if epoch % lr_decay_step == 0 and epoch != 0:
                        generator_lr *= 0.5
                        discriminator_lr *= 0.5

                    all_log_vals = []
                    for i in range(updates_per_epoch):
                        pbar.update(i)
                        # training d
                        images, wrong_images, embeddings, _, _ =\
                            self.dataset.train.next_batch(self.batch_size,
                                                          num_embedding)
                        feed_dict = {self.images: images,
                                     self.wrong_images: wrong_images,
                                     self.embeddings: embeddings,
                                     self.generator_lr: generator_lr,
                                     self.discriminator_lr: discriminator_lr
                                     }
                        # train d
                        feed_out = [self.discriminator_trainer,
                                    self.d_sum,
                                    self.hist_sum,
                                    log_vars]
                        _, d_sum, hist_sum, log_vals = sess.run(feed_out,
                                                                feed_dict)
                        summary_writer.add_summary(d_sum, counter)
                        summary_writer.add_summary(hist_sum, counter)
                        all_log_vals.append(log_vals)
                        # train g
                        feed_out = [self.generator_trainer,
                                    self.g_sum]
                        _, g_sum = sess.run(feed_out,
                                            feed_dict)
                        summary_writer.add_summary(g_sum, counter)
                        # save checkpoint
                        counter += 1
                        if counter % self.snapshot_interval == 0:
                            snapshot_path = "%s/%s_%s.ckpt" %\
                                             (self.checkpoint_dir,
                                              self.exp_name,
                                              str(counter))
                            fn = saver.save(sess, snapshot_path)
                            print("Model saved in file: %s" % fn)

                    img_sum = self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY, epoch)
                    summary_writer.add_summary(img_sum, counter)

                    avg_log_vals = np.mean(np.array(all_log_vals), axis=0)
                    dic_logs = {}
                    for k, v in zip(log_keys, avg_log_vals):
                        dic_logs[k] = v
                        # print(k, v)

                    log_line = "; ".join("%s: %s" %
                                         (str(k), str(dic_logs[k]))
                                         for k in dic_logs)
                    print("Epoch %d | " % (epoch) + log_line)
                    sys.stdout.flush()
                    if np.any(np.isnan(avg_log_vals)):
                        raise ValueError("NaN detected!")

Esempio n. 7

0

Mostra file

updates_per_epoch = number_examples // cfg.TRAIN.BATCH_SIZE

number_examples_val = val_dataset._num_examples
updates_per_epoch_val = number_examples_val // cfg.TRAIN.BATCH_SIZE

# Create one-hot answers dictionary to be used in prepare answers
with open('./data/clver_rn/answer_to_ix.json', 'r') as answer_file:
    answer_to_ix = json.load(answer_file)

answer_to_one_hot = {}
one_hot_init_vector = [0] * len(answer_to_ix)

# Set up the training loop
for epoch in range(1, args.epochs + 1):
    epoch_start_time = time.time()
    widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()]
    pbar = ProgressBar(maxval=updates_per_epoch, widgets=widgets)
    pbar.start()

    # Call the train and the test step for the dataset
    epoch_loss, epoch_accuracy = train(updates_per_epoch)
    log_line_train = '%s: %0.4f; %s: %0.4f; ' % (
        "Training Loss", epoch_loss, "Training Accuracy", epoch_accuracy)
    val_accuracy = val(updates_per_epoch_val)
    log_line_val = '%s: %0.4f ' % ("Validation Accuracy", val_accuracy)
    epoch_end_time = time.time()
    time_taken = epoch_end_time - epoch_start_time
    log_time_line = '%s: %0.4f' % ("Time taken for the current epoch",
                                   time_taken)
    sys.stdout.flush()
    print("Epoch %d | " % (epoch) + log_line_train + log_line_val +

Esempio n. 8

0

Mostra file

filenames = ["Stanford_Online_Products.zip"]
urls = [base_url + f for f in filenames]

fuel_data_path = os.path.join(fuel_root_path, "online_products")
os.mkdir(fuel_data_path)

for filename in filenames:
    url = base_url + filename
    filepath = os.path.join(fuel_data_path, filename)

    with contextlib.closing(request.urlopen(url)) as f:
        expected_filesize = int(f.headers["content-length"])
        print(expected_filesize)
    time.sleep(5)

    widgets = ['{}: '.format(filename), Percentage(), ' ', Bar(), ' ', ETA(),
               ' ', FileTransferSpeed()]
    progress_bar = ProgressBar(widgets=widgets,
                               maxval=expected_filesize).start()

    def reporthook(count, blockSize, totalSize):
        progress_bar.update(min(count*blockSize, totalSize))

    request.urlretrieve(url, filepath, reporthook=reporthook)
    progress_bar.finish()

    downloaded_filesize = os.path.getsize(filepath)
    assert expected_filesize == downloaded_filesize, " ".join((
        "expected file size is {}, but the actual size of the downloaded file",
        "is {}.")).format(expected_filesize, downloaded_filesize)

Esempio n. 9

0

Mostra file

def generate_template_dtree(compute_function,
                            classname,
                            pool,
                            outfile,
                            doc,
                            login,
                            align='left',
                            MathJax=False):

    # TODO: Support for right align in 'parent' functions
    from parampool.generator.flask.latex_symbols import \
         get_symbol, symbols_same_size
    import inspect
    args = inspect.getargspec(compute_function).args
    app_dir = outfile.split("templates")[0]
    static_dir = os.path.join(app_dir, "static")
    compute_function_name = compute_function.__name__

    pre_code = """\
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <title>Django %(compute_function_name)s app</title>
    <link rel="StyleSheet" href="static/dtree.css" type="text/css" />
    <script type="text/javascript" src="static/dtree.js"></script>
  </head>
  <body>
""" % vars()
    if MathJax:
        pre_code += '''
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
  TeX: {
     equationNumbers: {  autoNumber: "AMS"  },
     extensions: ["AMSmath.js", "AMSsymbols.js", "autobold.js"]
  }
});
</script>
<script type="text/javascript"
 src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
</script>
<!-- Fix slow MathJax rendering in IE8 -->
<meta http-equiv="X-UA-Compatible" content="IE=EmulateIE7">

'''
    if login:
        pre_code += '''\
  {% if user.is_anonymous %}
  <p align="right"><a href="/login">Login</a> / <a href="/reg">Register</a></p>
  {% else %}
  <p align="right">Logged in as {{user}}<br><a href="/old">Previous simulations</a><br><a href="/logout">Logout</a></p>
  {% endif %}
'''

    pre_code += """\
  %(doc)s

  <!-- Input and Results are typeset as a two-column table -->
  <table>
  <tr>
  <td valign="top">
    <h2>Input:</h2>
    <div class="dtree">
    <p><a href="javascript: d.openAll();">open all</a> | <a href="javascript: d.closeAll();">close all</a></p>
    <form method=post action="" enctype="multipart/form-data">{%% csrf_token %%}
      <script type="text/javascript">
        d = new dTree('d');
""" % vars()

    post_code = """\
        document.write(d);
      </script>
    </div>
    <p><input type="submit" value="Compute"></form></p>
    </td>

  <td valign="top">
    {% if result != None %}
      <h2>Result:</h2>
      {{ result|safe }}
"""

    if login:
        post_code += '''
      {% if not user.is_anonymous %}
      <h3>Comments:</h3>
      <form method=post action="/add_comment/">{% csrf_token %}
        <textarea name="comments" rows="4" cols="40"></textarea>
        <p><input type="submit" value="Add">
      </form>
      {% endif %}
'''
    post_code += '''
    {% endif %}
  </td>
  </tr>
  </table>
  </body>
</html>'''

    def leaf_func(tree_path, level, item, user_data):
        id = user_data.id
        parent_id = user_data.parent_id[-1]
        name = item.name
        field_name = parampool.utils.legal_variable_name(name)
        form = """\
&nbsp; {%% spaceless %%} {{ form.%(field_name)s }} {%% endspaceless %%} \
{%% if form.%(field_name)s.errors %%} \
{%% for error in form.%(field_name)s.errors %%} \
<err> {{error}} </err> \
{%% endfor %%}{%% endif %%} """ % vars()
        # (Note: need the spaceless trick to ensure that the resulting HTML
        # code is on one line for select widgets in Django (strange behavior))

        if hasattr(user_data, 'pb'):
            user_data.pb.update(user_data.pbid)
            user_data.pbid += 1

        if 'symbol' in item.data:
            symbol = item.data["symbol"]
        else:
            symbol = "\\mbox{%s}" % name
        imgsrc = get_symbol(symbol, static_dir, tree_path)
        imgsrc = os.sep + "static" + imgsrc.split("static")[-1]

        # Use slider and show current value
        if item.data.get("widget", None) in ("range", "integer_range"):
            showvalue = ' &nbsp; <span id="range"></span>'
        else:
            showvalue = ""

        # Make label
        label = []
        if 'help' in item.data:
            label.append(item.data['help'])
        if 'unit' in item.data:
            label.append('Unit: ' + item.data['unit'])
        label = ' '.join(label)

        if align == "right":
            line = '%(form)s<img src="%(imgsrc)s" height="18" />' % vars()
            line += showvalue
            user_data.code += """\
                    d.add(%(id)i, %(parent_id)i, '%(line)s', '#', '%(label)s');
""" % vars()
        else:
            form += showvalue
            user_data.code += """\
                    d.add(%(id)i, %(parent_id)i, '%(form)s', '#', '%(label)s', '', '%(imgsrc)s');
""" % vars()
        user_data.id += 1

    def subtree_start_func(tree_path, level, item, user_data):
        id = user_data.id
        parent_id = user_data.parent_id[-1]
        name = item.name
        user_data.code += """\
                    d.add(%(id)i, %(parent_id)i, '%(name)s');
""" % vars()
        user_data.parent_id.append(user_data.id)
        user_data.id += 1

    def subtree_end_func(tree_path, level, item, user_data):
        del user_data.parent_id[-1]

    class CodeData:
        """Object to hold output code through tree recursion."""
        id = 0
        pbid = 0
        parent_id = [-1]

    codedata = CodeData()
    codedata.code = pre_code
    # Display a progressbar if we have many data items
    pool.update()
    num_widgets = len(args) if pool is None else len(pool.paths2data_items)
    display_progressbar = num_widgets >= 10
    if display_progressbar:
        from progressbar import \
             ProgressBar, Percentage, Bar, ETA, RotatingMarker
        widgets = [
            'Generating: ',
            Percentage(), ' ',
            Bar(marker=RotatingMarker()), ' ',
            ETA()
        ]
        pb = ProgressBar(widgets=widgets, maxval=num_widgets - 1).start()
        codedata.pb = pb
    pool.traverse(callback_leaf=leaf_func,
                  callback_subtree_start=subtree_start_func,
                  callback_subtree_end=subtree_end_func,
                  user_data=codedata,
                  verbose=False)
    if display_progressbar:
        pb.finish()

    code = codedata.code + post_code
    symbols_same_size(static_dir)

    if outfile is None:
        return code
    else:
        f = open(outfile, 'w')
        f.write(code)
        f.close()

Esempio n. 10

0

Mostra file

 def __init__(self, maxval=0):
     widgets = [Percentage(), ' ', Bar(marker='=', left='[', right=']'), ' ', ETA()]
     super(ProgressBarContext, self).__init__(widgets=widgets, maxval=maxval, fd=sys.stdout)

Esempio n. 11

0

Mostra file

        pbar = ProgressBar(maxval=len(masterscrape), term_width=100).start()
        while not result.ready():
            pbar.update(len(masterscrape) - result._number_left)
            time.sleep(1)
        pbar.finish()

        full_masterscrape = filter(None, result.get())

        pool2.close()
        pool2.join()

        got = []
        gc.disable()

        print("Validating and deduping links")
        pbar = ProgressBar(widgets=[Percentage(), Bar(),
                                    ETA()],
                           maxval=100,
                           term_width=100).start()
        #this loop is slow and needs to be faster. wtf.
        for idx, n in enumerate(full_masterscrape):
            linkhost = n[0]

            appendable = [[linkhost, actual_link] for actual_link in n[1]
                          if actual_link not in dontvisit]
            masterappend = [(linkhost, actual_link) for actual_link in n[1]]
            dontvisitappend = [
                actual_link for actual_link in n[1]
                if actual_link not in dontvisit
            ]

Esempio n. 12

0

Mostra file

File: split_tree.py Progetto: varuns23/FinalStateAnalysis

    args = parser.parse_args()
    input_files = args.infiles

    if not (args.parity == 'even' or args.parity == 'odd'):
        raise ValueError("parity argument must be 'even' or 'odd', not %s" %
                         args.parity)

    logging.info("loading trees...")
    in_tree = ROOT.TChain(args.treepath)
    for i in input_files:
        in_tree.Add(i)

    new_file = ROOT.TFile(args.outputfile, "recreate")
    new_tree = in_tree.CloneTree(0)

    entries = in_tree.GetEntries()
    progress = ProgressBar(widgets=[ETA(), Bar('>')], maxval=entries).start()

    for i, row in enumerate(in_tree):
        progress.update(i + 1)
        if args.parity == 'even' and (i % 2) == 0:
            in_tree.GetEntry(i)
            new_tree.Fill()
        elif args.parity == 'odd' and (i % 2) == 1:
            in_tree.GetEntry(i)
            new_tree.Fill()

    new_tree.AutoSave()
    del in_tree
    del new_file

Esempio n. 13

0

Mostra file

File: scraper.py Progetto: Tenticles-PTF/Evilgate

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError

# other constants
OUI_LIST_URL = 'http://standards.ieee.org/develop/regauth/oui/oui.txt'
OUI_LIST_FILE = 'oui.txt'
OUI_RE = re_compile(r'^\s+([0-9A-F]{6})\s+\(base 16\)\s+(.*)\s*$')

IP4P_LIST_URL = \
 'http://www.iana.org/assignments/protocol-numbers/protocol-numbers.xml'
IP4P_LIST_FILE = 'protocol-numbers.xml'

HELPER_DATA = join(realpath(dirname(__file__)), 'scraper.dat')

UA = 'tollgate/%s (scraper.py; Python)' % __version__
PBAR_WIDGET_STYLE = [Percentage(), Bar(), ETA()]


def download_file(filename, url):
    etag_filename = filename + '.etag'
    # check to see if there's an existing dump of the data
    mtime = None
    if exists(filename):
        if not isfile(filename):
            raise Exception(
                ('ERROR: %s exists but is not a file.  Please check this, and'
                 + 'move it out of the way so I can run.') % filename)

    # lets also check for an etag, and use it if it's there.
    etag = None
    if exists(etag_filename):

Esempio n. 14

0

Mostra file

File: raytracer.py Progetto: rustyhowell/raytracer_py

    origin = Vec3(0, 0, 0)
    world = HitableList()
    world.append(Sphere(Vec3(0, 0, -1), 0.5, Lambertian(Vec3(0.8, 0.3, 0.3))))
    world.append(
        Sphere(Vec3(0, -100.5, -1), 100, Lambertian(Vec3(0.8, 0.8, 0.0))))
    world.append(
        Sphere(Vec3(1, 0, -1), 0.5, Metal(Vec3(0.8, 0.6, 0.2), fuzz=0.3)))
    world.append(
        Sphere(Vec3(-1, 0, -1), 0.5, Metal(Vec3(0.8, 0.8, 0.8), fuzz=1.0)))

    cam = Camera()
    pbar = ProgressBar(
        widgets=['Percentage ',
                 Percentage(), ' ',
                 ETA(), ' ',
                 Bar()],
        maxval=nx * ny).start()

    with open('image.ppm', 'w') as f:
        f.write('P3\n{} {}\n255\n'.format(nx, ny))
        for y, j in enumerate(xrange(ny - 1, -1, -1)):
            for i in xrange(nx):
                col = Vec3(0, 0, 0)
                for _ in xrange(ns):
                    u = float(i + random()) / nx
                    v = float(j + random()) / ny

                    r = cam.get_ray(u, v)
                    p = r.point_at_parameter(2.0)
                    col += color(r, world, depth=0)
                col /= float(ns)

Esempio n. 15

0

Mostra file

 def train(self):
     config = tf.ConfigProto(allow_soft_placement=True)
     with tf.Session(config=config) as sess:
         with tf.device("/gpu:%d" % cfg.GPU_ID):
             counter = self.build_model(sess)
             saver = tf.train.Saver(tf.all_variables(),
                                    keep_checkpoint_every_n_hours=5)
             summary_writer = tf.summary.FileWriter(self.log_dir,
                                                    sess.graph)
             if cfg.TRAIN.FINETUNE_LR:
                 keys = ["hr_d_loss", "hr_g_loss", "d_loss", "g_loss"]
             else:
                 keys = ["d_loss", "g_loss"]
             log_vars = []
             log_keys = []
             for k, v in self.log_vars:
                 if k in keys:
                     log_vars.append(v)
                     log_keys.append(k)
             generator_lr = cfg.TRAIN.GENERATOR_LR
             discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR
             lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH
             number_example = self.dataset.train._num_examples
             updates_per_epoch = int(number_example / self.batch_size)
             # int((counter + lr_decay_step/2) / lr_decay_step)
             decay_start = cfg.TRAIN.PRETRAINED_EPOCH
             epoch_start = int(counter / updates_per_epoch)
             for epoch in range(epoch_start, self.max_epoch):
                 widgets = [
                     "epoch #%d|" % epoch,
                     Percentage(),
                     Bar(),
                     ETA()
                 ]
                 pbar = ProgressBar(maxval=updates_per_epoch,
                                    widgets=widgets)
                 pbar.start()
                 if epoch % lr_decay_step == 0 and epoch > decay_start:
                     generator_lr *= 0.5
                     discriminator_lr *= 0.5
                 all_log_vals = []
                 for i in range(updates_per_epoch):
                     pbar.update(i)
                     log_vals = self.train_one_step(generator_lr,
                                                    discriminator_lr,
                                                    counter, summary_writer,
                                                    log_vars, sess)
                     all_log_vals.append(log_vals)
                     # save checkpoint
                     counter += 1
                     if counter % self.snapshot_interval == 0:
                         snapshot_path = "%s/%s_%s.ckpt" %\
                                          (self.checkpoint_dir,
                                           self.exp_name,
                                           str(counter))
                         fn = saver.save(sess, snapshot_path)
                         print("Model saved in file: %s" % fn)
                 img_summary, img_summary2 = self.epoch_sum_images(
                     sess, cfg.TRAIN.NUM_COPY)
                 summary_writer.add_summary(img_summary, counter)
                 summary_writer.add_summary(img_summary2, counter)
                 avg_log_vals = np.mean(np.array(all_log_vals), axis=0)
                 dic_logs = {}
                 for k, v in zip(log_keys, avg_log_vals):
                     dic_logs[k] = v
                 log_line = "; ".join("%s: %s" % (str(k), str(dic_logs[k]))
                                      for k in dic_logs)
                 print("Epoch %d | " % (epoch) + log_line)
                 sys.stdout.flush()
                 if np.any(np.isnan(avg_log_vals)):
                     raise ValueError("NaN detected!")

Esempio n. 16

0

Mostra file

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('source_path',
                        default="./CCTV_News.mp4",
                        help="Path to the video or audio file to subtitle",
                        nargs='?')
    parser.add_argument('-C',
                        '--concurrency',
                        help="Number of concurrent API requests to make",
                        type=int,
                        default=10)
    parser.add_argument(
        '-o',
        '--output',
        help="Output path for subtitles (by default, subtitles are saved in \
                        the same directory and name as the source path)")
    parser.add_argument('-F',
                        '--format',
                        help="Destination subtitle format",
                        default="srt")
    parser.add_argument('-S',
                        '--src-language',
                        help="Language spoken in source file",
                        default="zh-CN")
    parser.add_argument('-D',
                        '--dst-language',
                        help="Desired language for the subtitles",
                        default="zh-CN")
    parser.add_argument(
        '-K',
        '--api-key',
        help=
        "The Google Translate API key to be used. (Required for subtitle translation)"
    )
    parser.add_argument('--list-formats',
                        help="List all available subtitle formats",
                        action='store_true')
    parser.add_argument('--list-languages',
                        help="List all available source/destination languages",
                        action='store_true')

    args = parser.parse_args()

    if args.list_formats:
        print("List of formats:")
        for subtitle_format in FORMATTERS.keys():
            print("{format}".format(format=subtitle_format))
        return 0

    if args.list_languages:
        print("List of all languages:")
        for code, language in sorted(LANGUAGE_CODES.items()):
            print("{code}\t{language}".format(code=code, language=language))
        return 0

    if args.format not in FORMATTERS.keys():
        print(
            "Subtitle format not supported. Run with --list-formats to see all supported formats."
        )
        return 1

    if args.src_language not in LANGUAGE_CODES.keys():
        print(
            "Source language not supported. Run with --list-languages to see all supported languages."
        )
        return 1

    if args.dst_language not in LANGUAGE_CODES.keys():
        print(
            "Destination language not supported. Run with --list-languages to see all supported languages."
        )
        return 1

    if not args.source_path:
        print("Error: You need to specify a source path.")
        return 1
    audio_filename, audio_rate = extract_audio(args.source_path)
    regions = find_speech_regions(audio_filename)
    pool = multiprocessing.Pool(args.concurrency)
    converter = WAVConverter(source_path=audio_filename, slicenum=len(regions))
    if regions:
        try:
            widgets = [
                "Converting speech regions to WAVC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            os.remove(audio_filename)
            wavlist = create_manifest(os.getcwd() + '/temp',
                                      os.getcwd() + '/temp' + '/wavlist.txt')
            transcripts = infer.infer_interface(wavlist,
                                                len(extracted_regions))
        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            return 1

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(args.format)
    formatted_subtitles = formatter(timed_subtitles)
    dest = args.output
    if not dest:
        base, ext = os.path.splitext(args.source_path)
        dest = "{base}.{format}".format(base=base, format=args.format)
    with open(dest, 'wb') as f:
        f.write(formatted_subtitles.encode("utf-8"))
    print("Subtitles file created at {}".format(dest))
    shutil.rmtree('temp')
    return 0

Esempio n. 17

0

Mostra file

File: import_countries.py Progetto: emperadorxp1/TrytonModules

def _progress(iterable):
    if ProgressBar:
        pbar = ProgressBar(widgets=[SimpleProgress(), Bar(), ETA()])
    else:
        pbar = iter
    return pbar(iterable)

Esempio n. 18

0

Mostra file

 def __init__(self,analysis,sample,**kwargs):
     # default to access via sample/analysis
     self.analysis = analysis
     self.sample = sample
     self.shift = kwargs.pop('shift','')
     self.skipHists = kwargs.pop('skipHists',False)
     self.isData = isData(self.sample)
     self.intLumi = kwargs.get('intLumi',float(getLumi()))
     logging.debug('Initializing {0} {1} {2}'.format(self.analysis,self.sample,self.shift))
     # backup passing custom parameters
     self.ntupleDirectory = kwargs.pop('ntupleDirectory','{0}/{1}'.format(getNtupleDirectory(self.analysis,shift=self.shift),self.sample))
     self.inputFileList = kwargs.pop('inputFileList','')
     self.outputFile = kwargs.pop('outputFile',getNewFlatHistograms(self.analysis,self.sample,shift=self.shift))
     if os.path.dirname(self.outputFile): python_mkdir(os.path.dirname(self.outputFile))
     self.treeName = kwargs.pop('treeName',getTreeName(self.analysis))
     if hasProgress:
         self.pbar = kwargs.pop('progressbar',ProgressBar(widgets=['{0}: '.format(sample),' ',SimpleProgress(),' ',Percentage(),' ',Bar(),' ',ETA()]))
     else:
         self.pbar = None
     # get stuff needed to flatten
     self.infile = 0
     self.tchain = 0
     self.initialized = False
     self.hists = {}
     self.datasets = {}

Esempio n. 19

0

Mostra file

File: fit_detector.py Progetto: benshanks/mjd-analysis

def main(argv):
    ##################
    #These change a lot
    numWaveforms = 30
    numThreads = 12

    ndim = 6 * numWaveforms + 8
    nwalkers = 4 * ndim

    iter = 5000
    burnIn = 4000
    wfPlotNumber = 100

    ######################

    #  plt.ion()

    fitSamples = 200

    #Prepare detector
    zero_1 = -5.56351644e+07
    pole_1 = -1.38796386e+04
    pole_real = -2.02559385e+07
    pole_imag = 9885315.37450211

    zeros = [zero_1, 0]
    poles = [pole_real + pole_imag * 1j, pole_real - pole_imag * 1j, pole_1]
    system = signal.lti(zeros, poles, 1E7)

    tempGuess = 77.89
    gradGuess = 0.0483
    pcRadGuess = 2.591182
    pcLenGuess = 1.613357

    #Create a detector model
    detName = "conf/P42574A_grad%0.2f_pcrad%0.2f_pclen%0.2f.conf" % (0.05, 2.5,
                                                                     1.65)
    det = Detector(detName,
                   temperature=tempGuess,
                   timeStep=1.,
                   numSteps=fitSamples * 10,
                   tfSystem=system)
    det.LoadFields("P42574A_fields_v3.npz")
    det.SetFields(pcRadGuess, pcLenGuess, gradGuess)

    tempIdx = -8
    gradIdx = -7
    pcRadIdx = -6
    pcLenIdx = -5
    #and the remaining 4 are for the transfer function

    fig_size = (20, 10)

    #Create a decent start guess by fitting waveform-by-waveform

    wfFileName = "P42574A_512waveforms_%drisetimeculled.npz" % numWaveforms
    if os.path.isfile(wfFileName):
        data = np.load(wfFileName)
        results = data['results']
        wfs = data['wfs']
        numWaveforms = wfs.size
    else:
        print "No saved waveforms available.  Loading from Data"
        exit(0)

    #prep holders for each wf-specific param
    r_arr = np.empty(numWaveforms)
    phi_arr = np.empty(numWaveforms)
    z_arr = np.empty(numWaveforms)
    scale_arr = np.empty(numWaveforms)
    t0_arr = np.empty(numWaveforms)
    smooth_arr = np.ones(numWaveforms) * 7.
    simWfArr = np.empty((1, numWaveforms, fitSamples))

    #Prepare the initial value arrays
    for (idx, wf) in enumerate(wfs):
        wf.WindowWaveformTimepoint(fallPercentage=.99)
        r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[
            idx], smooth_arr[idx] = results[idx]['x']
        t0_arr[
            idx] += 10  #because i had a different windowing offset back in the day

    #Plot the waveforms to take a look at the initial guesses
    if False:
        fig = plt.figure()
        for (idx, wf) in enumerate(wfs):

            print "WF number %d:" % idx
            print "  >>r: %f\n  >>phi %f\n  >>z %f\n  >>e %f\n  >>t0 %f\n >>smooth %f" % (
                r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx],
                t0_arr[idx], smooth_arr[idx])
            ml_wf = det.GetSimWaveform(r_arr[idx],
                                       phi_arr[idx],
                                       z_arr[idx],
                                       scale_arr[idx] * 100,
                                       t0_arr[idx],
                                       fitSamples,
                                       smoothing=smooth_arr[idx])
            plt.plot(ml_wf, color="b")
            plt.plot(wf.windowedWf, color="r")
        value = raw_input('  --> Press q to quit, any other key to continue\n')
        if value == 'q': exit(0)

    #Initialize the multithreading
    p = Pool(numThreads,
             initializer=initializeDetectorAndWaveforms,
             initargs=[det, wfs])
    initializeDetectorAndWaveforms(det, wfs)

    #Do the MCMC
    mcmc_startguess = np.hstack((
        r_arr[:],
        phi_arr[:],
        z_arr[:],
        scale_arr[:] * 100.,
        t0_arr[:],
        smooth_arr[:],  # waveform-specific params
        tempGuess,
        gradGuess,
        pcRadGuess,
        pcLenGuess,
        zero_1,
        pole_1,
        pole_real,
        pole_imag))  # detector-specific

    #number of walkers _must_ be even
    if nwalkers % 2:
        nwalkers += 1

    #Initialize walkers with a random, narrow ball around the start guess
    pos0 = [
        mcmc_startguess + 1e-2 * np.random.randn(ndim) * mcmc_startguess
        for i in range(nwalkers)
    ]

    #Make sure everything in the initial guess is within bounds
    for pos in pos0:
        pos[:numWaveforms] = np.clip(pos[:numWaveforms], 0,
                                     np.floor(det.detector_radius * 10.) / 10.)
        pos[numWaveforms:2 * numWaveforms] = np.clip(
            pos[numWaveforms:2 * numWaveforms], 0, np.pi / 4)
        pos[2 * numWaveforms:3 * numWaveforms] = np.clip(
            pos[2 * numWaveforms:3 * numWaveforms], 0,
            np.floor(det.detector_length * 10.) / 10.)
        pos[4 * numWaveforms:5 * numWaveforms] = np.clip(
            pos[4 * numWaveforms:5 * numWaveforms], 0, fitSamples)
        pos[5 * numWaveforms:6 * numWaveforms] = np.clip(
            pos[5 * numWaveforms:6 * numWaveforms], 0, 20.)

        pos[tempIdx] = np.clip(pos[tempIdx], 40, 120)
        pos[gradIdx] = np.clip(pos[gradIdx], det.gradList[0], det.gradList[-1])
        pos[pcRadIdx] = np.clip(pos[pcRadIdx], det.pcRadList[0],
                                det.pcRadList[-1])
        pos[pcLenIdx] = np.clip(pos[pcLenIdx], det.pcLenList[0],
                                det.pcLenList[-1])

        prior = lnprior(pos, )
        if not np.isfinite(prior):
            print "BAD PRIOR WITH START GUESS YOURE KILLING ME SMALLS"
            print pos
            exit(0)

    #Initialize, run the MCMC
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=p)

    #w/ progress bar, & time the thing
    bar = ProgressBar(widgets=[Percentage(), Bar()], maxval=iter).start()
    start = timer()
    for (idx, result) in enumerate(
            sampler.sample(pos0, iterations=iter, storechain=True)):
        bar.update(idx + 1)
    end = timer()
    bar.finish()

    print "Elapsed time: " + str(end - start)

    print "Dumping chain to file..."
    np.save("sampler_%dwfs.npy" % numWaveforms, sampler.chain)

    print "Making MCMC steps figure..."

    #########  Plots for Waveform params
    stepsFig = plt.figure(2, figsize=fig_size)
    plt.clf()
    ax0 = stepsFig.add_subplot(611)
    ax1 = stepsFig.add_subplot(612, sharex=ax0)
    ax2 = stepsFig.add_subplot(613, sharex=ax0)
    ax3 = stepsFig.add_subplot(614, sharex=ax0)
    ax4 = stepsFig.add_subplot(615, sharex=ax0)
    ax5 = stepsFig.add_subplot(616, sharex=ax0)

    ax0.set_ylabel('r')
    ax1.set_ylabel('phi')
    ax2.set_ylabel('z')
    ax3.set_ylabel('scale')
    ax4.set_ylabel('t0')
    ax5.set_ylabel('smoothing')

    for i in range(nwalkers):
        for j in range(wfs.size):
            ax0.plot(sampler.chain[i, :, 0 + j], alpha=0.3)  # r
            ax1.plot(sampler.chain[i, :, numWaveforms + j], alpha=0.3)  # phi
            ax2.plot(sampler.chain[i, :, 2 * numWaveforms + j], alpha=0.3)  #z
            ax3.plot(sampler.chain[i, :, 3 * numWaveforms + j],
                     alpha=0.3)  #energy
            ax4.plot(sampler.chain[i, :, 4 * numWaveforms + j], alpha=0.3)  #t0
            ax5.plot(sampler.chain[i, :, 5 * numWaveforms + j],
                     alpha=0.3)  #smoothing

    plt.savefig("emcee_wfchain_%dwfs.png" % numWaveforms)

    #########  Plots for Detector params
    stepsFigDet = plt.figure(3, figsize=fig_size)
    plt.clf()
    ax0 = stepsFigDet.add_subplot(411)
    ax1 = stepsFigDet.add_subplot(412, sharex=ax0)
    ax2 = stepsFigDet.add_subplot(413, sharex=ax0)
    ax3 = stepsFigDet.add_subplot(414, sharex=ax0)

    ax0.set_ylabel('temp')
    ax1.set_ylabel('grad')
    ax2.set_ylabel('pcRad')
    ax3.set_ylabel('pcLen')

    for i in range(nwalkers):
        ax0.plot(sampler.chain[i, :, tempIdx], "b", alpha=0.3)  #temp
        ax1.plot(sampler.chain[i, :, gradIdx], "b", alpha=0.3)  #grad
        ax2.plot(sampler.chain[i, :, pcRadIdx], "b", alpha=0.3)  #pcrad
        ax3.plot(sampler.chain[i, :, pcLenIdx], "b", alpha=0.3)  #pclen

    plt.savefig("emcee_detchain_%dwfs.png" % numWaveforms)

    #and for the transfer function
    stepsFigTF = plt.figure(4, figsize=fig_size)
    plt.clf()
    tf0 = stepsFigTF.add_subplot(411)
    tf1 = stepsFigTF.add_subplot(412, sharex=ax0)
    tf2 = stepsFigTF.add_subplot(413, sharex=ax0)
    tf3 = stepsFigTF.add_subplot(414, sharex=ax0)
    tf0.set_ylabel('zero_1')
    tf1.set_ylabel('pole_1')
    tf2.set_ylabel('pole_real')
    tf3.set_ylabel('pole_imag')

    for i in range(nwalkers):
        tf0.plot(sampler.chain[i, :, -4], "b", alpha=0.3)  #2
        tf1.plot(sampler.chain[i, :, -3], "b", alpha=0.3)  #den1
        tf2.plot(sampler.chain[i, :, -2], "b", alpha=0.3)  #2
        tf3.plot(sampler.chain[i, :, -1], "b", alpha=0.3)  #3

    plt.savefig("emcee_tfchain_%dwfs.png" % numWaveforms)

    samples = sampler.chain[:, burnIn:, :].reshape((-1, ndim))

    print "temp is %f" % np.median(samples[:, tempIdx])
    print "grad is %f" % np.median(samples[:, gradIdx])
    print "pcrad is %f" % np.median(samples[:, pcRadIdx])
    print "pclen is %f" % np.median(samples[:, pcLenIdx])
    print "zero_1 is %f" % np.median(samples[:, -4])
    print "pole_1 is %f" % np.median(samples[:, -3])
    print "pole_real is %f" % np.median(samples[:, -2])
    print "pole_imag is %f" % np.median(samples[:, -1])

    #TODO: Aaaaaaand plot some waveforms..
    simWfs = np.empty((wfPlotNumber, numWaveforms, fitSamples))

    for idx, (theta) in enumerate(samples[np.random.randint(
            len(samples), size=wfPlotNumber)]):
        temp, impGrad, pcRad, pcLen = theta[tempIdx], theta[gradIdx], theta[
            pcRadIdx], theta[pcLenIdx]
        zero_1, pole_1, pole_real, pole_imag = theta[-4:]
        r_arr, phi_arr, z_arr, scale_arr, t0_arr, smooth_arr = theta[:-8].reshape(
            (6, numWaveforms))
        det.SetTemperature(temp)
        det.SetFields(pcRad, pcLen, impGrad)

        zeros = [zero_1, 0]
        poles = [
            pole_real + pole_imag * 1j, pole_real - pole_imag * 1j, pole_1
        ]
        det.SetTransferFunction(zeros, poles, 1E7)

        for wf_idx in range(wfs.size):
            wf_i = det.GetSimWaveform(r_arr[wf_idx], phi_arr[wf_idx],
                                      z_arr[wf_idx], scale_arr[wf_idx],
                                      t0_arr[wf_idx], fitSamples)
            simWfs[idx, wf_idx, :] = wf_i
            if wf_i is None:
                print "Waveform %d, %d is None" % (idx, wf_idx)

    residFig = plt.figure(4, figsize=(20, 15))
    helpers.plotManyResidual(simWfs, wfs, figure=residFig)
    plt.savefig("emcee_waveforms_%dwfs.png" % numWaveforms)

Esempio n. 20

0

Mostra file

# Bind p(x, z) and q(z | x) to the same placeholder for x.
data = {x: x_ph}
inference = ed.ReparameterizationKLKLqp({z: qz}, data)
optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
inference.initialize(optimizer=optimizer, use_prettytensor=True)

init = tf.global_variables_initializer()
init.run()

n_epoch = 100
n_iter_per_epoch = 1000
for epoch in range(n_epoch):
    avg_loss = 0.0

    widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
    pbar = ProgressBar(n_iter_per_epoch, widgets=widgets)
    pbar.start()
    for t in range(n_iter_per_epoch):
        pbar.update(t)
        x_train, _ = mnist.train.next_batch(M)
        info_dict = inference.update(feed_dict={x_ph: x_train})
        avg_loss += info_dict['loss']

    # Print a lower bound to the average marginal likelihood for an
    # image.
    avg_loss = avg_loss / n_iter_per_epoch
    avg_loss = avg_loss / M
    print("log p(x) >= {:0.3f}".format(avg_loss))

    # Visualize hidden representations.

Esempio n. 21

0

Mostra file

File: histogram_o16.py Progetto: tenleyhs/jYT

        time = odata_pt[:, 1]
        tindex = abs(time - pf.current_time.v).argmin()

    if args.subsample >= 0 and pf.h.max_level - args.undersample < args.subsample:
        print 'ERROR: Subsample must be less than max refine level - undersample.'
        sys.exit()

    maxval = np.empty(len(args.vars))
    minval = np.empty(len(args.vars))
    maxval.fill(-float("inf"))
    minval.fill(float("inf"))
    vals = list()
    pbar = ProgressBar(widgets=[
        'Determining histogram bounds and initial pass of data: ',
        Percentage(),
        Bar(), ' ',
        ETA()
    ],
                       maxval=len(pf.index.grids)).start()
    for cnt, g in enumerate(pf.index.grids):
        if g.Level > pf.h.max_level - args.undersample: continue
        if len(g.Children
               ) != 0 and g.Level != pf.h.max_level - args.undersample:
            continue

        evals = list()
        vvals = list()
        #vvals = g.get_data(args.var).ravel()
        for e, ev in enumerate(args.vars):
            vvals.append(g[ev].ravel())
        dvals = g["dens"].ravel().v * g["o16 "].ravel().v

Esempio n. 22

0

Mostra file

File: train.py Progetto: ajaymaity/CV_Extended_Residual_Networks

def main(arguments):
    """ Main method of the file.
        Args:
            arguments: ArgumentParser object which contains user-configurable parameters.
                For more info, look into parseArguments() method.
        Returns: None
    """

    # Download data, if asked by user.
    if arguments.download_data:
        
        # Create data folder, if it doesn't exist.
        if not os.path.exists(os.path.join("data")):
            os.makedirs(os.path.join("data"))

        # Define URL to get data from,
        # and the zip folder path where the data will be stored.
        url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
        zipFolder = os.path.join("data", "tiny-imagenet-200.zip")
        
        """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
        # Define a method which will unzip the downloaded zip folder.
        def unzip():
            
            print("Unzipping data...")
            zipFile = ZipFile(zipFolder, "r")
            uncompressedSize = sum(file.file_size for file in zipFile.infolist())
            extractedSize = 0
            pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=100).start()
            start = datetime.datetime.now()
            for file in zipFile.infolist():
                extractedSize += file.file_size
                percent = extractedSize * 100 / uncompressedSize
                pbar.update(percent)
                zipFile.extract(file, path=zipFolder.rsplit(".", 1)[0])
            print("Unzipped in {} s.".format((datetime.datetime.now() - start).seconds))
            zipFile.close()
        """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
        
        # Proceed to download only if the tiny imagenet folder does not already exist.
        if (not os.path.exists(os.path.join("data", "tiny-imagenet-200"))):
        
            # Proceed to download only if the downloaded zip folder does not exist,
            # else directly unzip the previously downloaded zip file.
            if (not os.path.isfile(zipFolder)):
                
                print("Retrieving dataset from web...")
                pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=100).start()
                def dlProgress(count, blockSize, totalSize):
                    percent = int(count * blockSize * 100 / totalSize)
                    pbar.update(percent)
                start = datetime.datetime.now()
                urlretrieve(url, zipFolder, reporthook=dlProgress)
                print("Downloaded in {} s.".format((datetime.datetime.now() - start).seconds))

            unzip()
            reconfigureValSet(arguments.data_dir)
        else: print("Dataset folder already exists.")

    # Define image parameters.
    imgWidth = 64 # width of image
    imgHeight = 64 # height of image
    imgChannels = 3 # channels of image, RGB
    lenTrainData = 100000 # total number of training data
    lenValData = 10000 # total number of validation data
    classes = 200 # number of classes

    # Define image shape.
    if (K.image_data_format() == "channels_first"):
        imgShape = (imgChannels, imgWidth, imgHeight)
    else:
        imgShape = (imgWidth, imgHeight, imgChannels)

    # Create Model.'
    network = arguments.model
    if (network == "vgg16"):

        batchSize = 256
        numEpochs = 2000 # 74 epochs used in the original paper
        learningRate = 0.01

        from models import vgg16
        model = vgg16.createNetwork(imgShape, classes, learningRate, arguments.pretrained, arguments.init)

    elif (network == "resnet50"):

        batchSize = 256
        numEpochs = 2000 # 60e4 iterations in the original paper
        learningRate = 0.1
        
        from models import resnet50    
        model = resnet50.createNetwork(imgShape, classes, learningRate)

    elif (network == "ext-resnet41"):

        batchSize = 256
        numEpochs = 2000
        learningRate = 0.1
        
        from models import ext_resnet41
        model = ext_resnet41.createNetwork(imgShape, classes, learningRate)
    
    elif (network == "ext-resnet50"):

        batchSize = 256
        numEpochs = 2000
        learningRate = arguments.lr
        
        from models import ext_resnet50    
        model = ext_resnet50.createNetwork(imgShape, classes, learningRate, arguments.activation, arguments.init, arguments.loss, arguments.do, arguments.kernel_size, arguments.kernel_number)
    
    elif (network == "ext-resnet62"):

        batchSize = 256
        numEpochs = 2000
        learningRate = 0.1
        
        from models import ext_resnet62    
        model = ext_resnet62.createNetwork(imgShape, classes, learningRate)
        
    # Set featurewise mean of dataset.
    if K.image_data_format() == "channels_first":
        featurewiseMean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(3, 1, 1)
        featurewiseStd = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(3, 1, 1)
    else:
        featurewiseMean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 1, 3)
        featurewiseStd = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 1, 3)

    # Create training data generator.
    if (arguments.data_aug == "no"):
        trainDataGen = ImageDataGenerator(featurewise_center=True)
    elif (arguments.data_aug == "yes"):
        trainDataGen = ImageDataGenerator(featurewise_center=True,
                                            horizontal_flip=True,
                                            vertical_flip=True,
                                            rotation_range=20,
                                            width_shift_range=0.2,
                                            height_shift_range=0.2)
        trainDataGen.std = featurewiseStd
    else:
        trainDataGen = ImageDataGenerator(featurewise_center=True,
                                            horizontal_flip=True,
                                            vertical_flip=True)    

    trainDataGen.mean = featurewiseMean
    trainGen = trainDataGen.flow_from_directory(os.path.join(arguments.data_dir, "train"),
                                                target_size=(imgWidth, imgHeight),
                                                batch_size=batchSize)

    # Create validation data generator.
    if arguments.reconfigure_val: reconfigureValSet(arguments.data_dir) 
    valDataGen = ImageDataGenerator(featurewise_center=True)
    valDataGen.mean = featurewiseMean
    if (arguments.data_aug == "yes"): valDataGen.std = featurewiseStd
    valGen = valDataGen.flow_from_directory(os.path.join(arguments.data_dir, "val"),
                                            target_size=(imgWidth, imgHeight),
                                            batch_size=batchSize)

    # Create directories to store output.
    if arguments.pretrained: network = network + "-imagenet"
    if not os.path.exists(os.path.join("output", network)):
        os.makedirs(os.path.join("output", network))
    name = arguments.name.replace(" ", "_")
    if not os.path.exists(os.path.join("output", network, name)):
        os.makedirs(os.path.join("output", network, name))
    outputTime = str(datetime.datetime.now()).replace(" ", "_").replace(":", ".")
    if not os.path.exists(os.path.join("output", network, name, outputTime)):
        os.makedirs(os.path.join("output", network, name, outputTime))
    if not os.path.exists(os.path.join("weights", network, name)):
        os.makedirs(os.path.join("weights", network, name))

    # Create callbacks.
    tbCallback = TensorBoard(log_dir="./logs/{}/{}/{}".format(network, name, outputTime), histogram_freq=0, write_grads=True, write_graph=True)
    lrCallback = ReduceLROnPlateau(monitor="val_categorical_accuracy", patience=5, factor=0.1, verbose=1, min_lr=0.00001)
    esCallback = EarlyStopping(monitor="val_categorical_accuracy", patience=10, verbose=1, min_delta=0.0001)

    # Fit model to the dataset...
    model.fit_generator(trainGen,
                        steps_per_epoch=lenTrainData // batchSize,
                        epochs=numEpochs,
                        validation_data=valGen,
                        validation_steps=lenValData // batchSize,
                        callbacks=[tbCallback, lrCallback, esCallback])
    
    # Save the model...
    model.save_weights(os.path.join("weights", network, name, outputTime + ".h5"))

Esempio n. 23

0

Mostra file

File: go.py Progetto: sraikoti/VELVEEVA

def doScript():
    VERBOSE = False

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=banner())

    parser.add_argument("--bake",
                        action="store_true",
                        help="Compile templates and SASS (yum!)")
    parser.add_argument(
        "--clean",
        action="store_true",
        help=
        "Clean up the mess (mom would be proud!) [Selected when no options are given]"
    )
    parser.add_argument(
        "--controls",
        action="store_true",
        help="Generate slide control files (gonna have something already baked)"
    )
    parser.add_argument("--controlsonly",
                        action="store_true",
                        help="Only generate control files")
    parser.add_argument(
        "--dev",
        action="store_true",
        help=
        "Use the quick-bake test kitchen environment (no screenshots, no packaging). This is a shortcut to using go --clean --watch --veev2rel"
    )
    parser.add_argument("--init",
                        action="store_true",
                        help="Initialize a new VELVEEVA project")
    parser.add_argument("--nuke",
                        action="store_true",
                        help="Nuke old builds and temp files")
    parser.add_argument("--nobake",
                        action="store_true",
                        help="Don't bake it...")
    parser.add_argument("--package",
                        action="store_true",
                        help="Wrap it up [Selected when no options are given]")
    parser.add_argument(
        "--packageonly",
        action="store_true",
        help="Just wrap it up (you gotta already have something baked)")
    parser.add_argument("--publish",
                        action="store_true",
                        help="Ship it off to market")
    parser.add_argument(
        "--publishonly",
        action="store_true",
        help=
        "(Only) ship it off to market (you gotta already have something baked, and control files generated)"
    )
    parser.add_argument(
        "--relink",
        action="store_true",
        help=
        "Make some href saussage (replace relative links with global and convert to veeva: protocol)"
    )
    parser.add_argument(
        "--screenshots",
        action="store_true",
        help="Include Screenshots [Selected when no options are given]")
    parser.add_argument("--veev2rel",
                        action="store_true",
                        help="Convert veeva: hrefs to relative links")
    parser.add_argument("--verbose", action="store_true", help="Chatty Cathy")
    parser.add_argument("--watch",
                        action="store_true",
                        help="Watch for changes and re-bake on change")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    args = parser.parse_args()
    config = parse_config()

    SOURCE_DIR = config['MAIN']['source_dir']
    DEST_DIR = config['MAIN']['output_dir']
    GLOBALS_DIR = config['MAIN']['globals_dir']
    PARTIALS_DIR = config['MAIN']['partials_dir']
    TEMPLATES_DIR = config['MAIN']['templates_dir']
    ZIPS_DIR = config['MAIN']['zips_dir']

    ### CTL File Info ###
    CTLS_DIR = config['MAIN']['ctls_dir']
    VEEVA_USERNAME = config['VEEVA']['username']
    VEEVA_PASSWORD = config['VEEVA']['password']
    VEEVA_SERVER = config['VEEVA']['server']
    VEEVA_EMAIL = config['VEEVA'].get('email', None)

    ROOT_DIR = os.getcwd()
    CONFIG_FILE_NAME = "VELVEEVA-config.json"
    PROJECT_NAME = config['MAIN']['name']

    VELVEEVA_DIR = os.path.dirname(os.path.abspath(inspect.stack()[0][1]))

    #💩🍕

    print(banner())
    print("👉  %s 👈\n" % paint.bold.yellow(PROJECT_NAME))

    try:
        with ProgressBar(max_value=11,
                         widgets=[Bar(marker="🍕"),
                                  Percentage()],
                         redirect_stdout=True) as progress:
            #0. nuke
            print("🔥  %s" % paint.gray("Nuking old builds..."))
            progress.update(1)
            nuke(ROOT_DIR, config)

            #1. scaffold needed folders 🗄
            print("🗄  %s" % paint.gray("Creating directories..."))
            progress.update(2)
            scaffold(ROOT_DIR, config)

            #2. inline local (non-html) files, and create build folders 💉
            print("💉  %s " % paint.gray("Inlining partials and globals..."))
            progress.update(3)
            copy_locals(ROOT_DIR, SOURCE_DIR, DEST_DIR)

            #3. inline partials and globals
            progress.update(4)
            cmd = os.path.join(VELVEEVA_DIR, "lib", "inject.py")
            for out in execute(
                ["python3", cmd, ROOT_DIR, GLOBALS_DIR, DEST_DIR]):
                print(out)

            #4. render sass 💅
            print("💅  %s " % paint.gray("Compiling SASS..."))
            progress.update(5)
            cmd = os.path.join(VELVEEVA_DIR, "lib", "compile_sass.py")

            for out in execute(
                ["python3", cmd,
                 os.path.join(ROOT_DIR, DEST_DIR)]):
                print(out)

            #5. render templates 📝
            print("📝  %s " % paint.gray("Rendering templates..."))
            progress.update(6)
            cmd = os.path.join(VELVEEVA_DIR, "lib", "render_templates.py")

            for out in execute([
                    "python3", cmd,
                    os.path.join(ROOT_DIR, SOURCE_DIR),
                    os.path.join(ROOT_DIR, DEST_DIR),
                    os.path.join(ROOT_DIR, TEMPLATES_DIR),
                    os.path.join(ROOT_DIR, PARTIALS_DIR)
            ]):
                print(out)

            #6. take screenshots 📸
            print("📸  %s " % paint.gray("Taking screenshots..."))
            progress.update(7)
            cmd = os.path.join(VELVEEVA_DIR, "lib", "screenshot.py")
            src = os.path.abspath(os.path.join(ROOT_DIR, DEST_DIR))
            cfg = os.path.abspath(os.path.join(ROOT_DIR, CONFIG_FILE_NAME))

            for out in execute(["python3", cmd, src, cfg]):
                print(out)

            #7. package slides 📬
            progress.update(8)
            print("📬  %s " % paint.gray("Packaging slides..."))
            cmd = os.path.join(VELVEEVA_DIR, "lib", "package_slides.py")
            for out in execute([
                    "python3", cmd,
                    os.path.join(ROOT_DIR, DEST_DIR),
                    os.path.join(ROOT_DIR, DEST_DIR, ZIPS_DIR)
            ]):
                print(out)

            #8. generate control files ⚒
            print("⚒  %s " % paint.gray("Generating .ctl files..."))
            progress.update(9)
            cmd = os.path.join(VELVEEVA_DIR, "lib", "genctls.py")

            flags = [
                "python3", cmd, "--root", ROOT_DIR, "--src",
                os.path.abspath(os.path.join(ROOT_DIR, DEST_DIR,
                                             ZIPS_DIR)), "--out",
                os.path.abspath(os.path.join(ROOT_DIR, DEST_DIR, CTLS_DIR)),
                "--u", VEEVA_USERNAME, "--pwd", VEEVA_PASSWORD
            ]

            if VEEVA_EMAIL is not None:
                flags = flags + ["--email", VEEVA_EMAIL]

            for out in execute(flags):
                print(out)

            #9. ftp 🚀
            print("🚀  %s " % paint.gray("Publishing to Veeva FTP server..."))
            progress.update(10)

            cmd = os.path.join(VELVEEVA_DIR, "lib", "publish.py")
            for out in execute([
                    "python3", cmd, "--zip",
                    os.path.abspath(os.path.join(ROOT_DIR, DEST_DIR,
                                                 ZIPS_DIR)), "--ctl",
                    os.path.abspath(os.path.join(ROOT_DIR, DEST_DIR,
                                                 CTLS_DIR)), "--host",
                    VEEVA_SERVER, "--u", VEEVA_USERNAME, "--pwd",
                    VEEVA_PASSWORD
            ]):
                print(out)

            #done!
            progress.update(11)

            # relinking
            # don't use subprocess -> import directly
            # concurrent build
            # not as shitty exception handling
            # file watcher architecture
            # all utils should use python argparse and --src SRC (e.g.) flags not strictly positional arguments
            # make flags required (so fails if not present)
            # unified banner printer
    except Exception as e:
        print(paint.bold.red("\n💩  there was an error:"))
        print(e)
        sys.exit(1)

    print(paint.bold.green("\n🍕  Yum!"))

Esempio n. 24

0

Mostra file

import numpy as np
from osgeo import gdal, osr

from s3utils import save_tiff

from progressbar import Bar, Percentage, ProgressBar, ETA

if __name__ == "__main__":
    results_dir = '/media/rmsare/GALLIUMOS/ot_results/ot-ncal/'
    working_dir = results_dir + 'masked/'
    files = os.listdir(results_dir)
    files = [f for f in files if 'tif' in f]

    pbar = ProgressBar(widgets=[Percentage(), ' ',
                                Bar(), ' ',
                                ETA()],
                       maxval=len(files))
    pbar.start()
    for i, f in enumerate(files):
        tile_name = f[0:10]
        data_dir = '/media/rmsare/GALLIUMOS/data/ot_data/tif/2m/'
        data = dem.DEMGrid(data_dir + tile_name + '.tif')
        mask = np.isnan(data._griddata)

        inraster = gdal.Open(results_dir + f)
        transform = inraster.GetGeoTransform()
        nbands = inraster.RasterCount
        ncols = inraster.RasterXSize
        nrows = inraster.RasterYSize

Esempio n. 25

0

Mostra file

def search_whoosh_files(filename_in):
    ngt1 = RegexTokenizer() | NgramFilter(4)
    l_aux_i = 0
    filename_aux = "dataset_match_" + filename_in
    ix1 = open_dir("index_" + filename_in)

    #aux max val for progress bar
    if filename_in == "jrc_person":
        max_val = 3000000
    else:
        max_val = 3000000

    widgets = [
        'Progress Searching ' + filename_in + ': ',
        Percentage(), ' ',
        Bar(marker='0', left='[', right=']'), ' ',
        ETA(), ' '
    ]
    pbar = ProgressBar(widgets=widgets, maxval=max_val)  #454000
    pbar.start()

    with ix1.searcher() as searcher:
        parser = MultifieldParser(['title'], ix1.schema)
        parser.remove_plugin_class(qparser.WildcardPlugin)
        parser.remove_plugin_class(qparser.PlusMinusPlugin)
        with open("dataset_non_match_" + filename_in + ".csv_tmp",
                  'w',
                  encoding="utf-8") as inW2:
            with open("dataset_match_" + filename_in + ".csv",
                      encoding="utf8") as csvfile:
                for row in csvfile:
                    l_aux_i = l_aux_i + 1
                    if l_aux_i % 20000 == 0:
                        print("Index search" + str(l_aux_i))
                        pbar.update(l_aux_i)
                    l_row_idx = row.split('|')[0]
                    l_row_aux = row.split('|')[1]
                    search_list = [token.text for token in ngt1(l_row_aux)]
                    if len(search_list) > 0:
                        l_row_str = random.sample(search_list, 1)
                        query = parser.parse(l_row_str[0])
                        results = searcher.search(query)
                        results_aux = []
                        for result in results:
                            if result['id'] != l_row_idx:
                                results_aux.append(
                                    [result['id'], result['title']])
                        if len(results_aux) > 0:
                            shuffle(results_aux)
                            line_new = l_row_idx + "|" + l_row_aux + "|" + results_aux[
                                0][0] + "|" + results_aux[0][1]
                            inW2.write(line_new.strip() + '\n')
                            if len(results_aux) > 1:
                                if results_aux[1][0] != results_aux[0][0]:
                                    line_new = l_row_idx + "|" + l_row_aux + "|" + results_aux[
                                        1][0] + "|" + results_aux[1][1]
                                    inW2.write(line_new.strip() + '\n')
                            if len(results_aux) > 2:
                                if results_aux[2][0] != results_aux[1][0]:
                                    line_new = l_row_idx + "|" + l_row_aux + "|" + results_aux[
                                        2][0] + "|" + results_aux[2][1]
                                    inW2.write(line_new.strip() + '\n')
        pbar.finish()

Esempio n. 26

0

Mostra file

def consolidate():
    """
    Converts previous archive data model to new one.
    """

    session = Session()
    try:
        log.verbose('Checking archive size ...')
        count = session.query(ArchiveEntry).count()
        log.verbose(
            'Found %i items to migrate, this can be aborted with CTRL-C safely.'
            % count)

        # consolidate old data
        from progressbar import ProgressBar, Percentage, Bar, ETA

        widgets = [
            'Process - ',
            ETA(), ' ',
            Percentage(), ' ',
            Bar(left='[', right=']')
        ]
        bar = ProgressBar(widgets=widgets, maxval=count).start()

        # id's for duplicates
        duplicates = []

        for index, orig in enumerate(session.query(ArchiveEntry).yield_per(5)):
            bar.update(index)

            # item already processed
            if orig.id in duplicates:
                continue

            # item already migrated
            if orig.sources:
                log.info(
                    'Database looks like it has already been consolidated, '
                    'item %s has already sources ...' % orig.title)
                session.rollback()
                return

            # add legacy task to the sources list
            orig.sources.append(get_source(orig.task, session))
            # remove task, deprecated .. well, let's still keep it ..
            #orig.task = None

            for dupe in session.query(ArchiveEntry).\
                filter(ArchiveEntry.id != orig.id).\
                filter(ArchiveEntry.title == orig.title).\
                    filter(ArchiveEntry.url == orig.url).all():
                orig.sources.append(get_source(dupe.task, session))
                duplicates.append(dupe.id)

        if duplicates:
            log.info('Consolidated %i items, removing duplicates ...' %
                     len(duplicates))
            for id in duplicates:
                session.query(ArchiveEntry).filter(
                    ArchiveEntry.id == id).delete()
        session.commit()
        log.info('Completed! This does NOT need to be ran again.')
    except KeyboardInterrupt:
        session.rollback()
        log.critical('Aborted, no changes saved')
    finally:
        session.close()

Esempio n. 27

0

Mostra file

File: surfareas.py Progetto: tlb-lab/credovi

def do(controller):
    """
    """
    # get the controller command
    cmd = controller.command

    # get the command line arguments and options
    args = controller.pargs

    # predicate to remove non-polymer atoms from structure
    nonpolymers = oechem.OEOrAtom(
        OEAtomHasIntData(('entity_type_bm', 0)),
        OEAtomBinaryAndIntData(('entity_type_bm', 3)))

    assemblysets = get_assembly_sets(args)

    # directory containing all the biological assemblies in OEB format
    OEB_ASSEMBLIES_DIR = app.config.get('directories', 'quat_oeb')

    # directory where surface areas will be written
    CREDO_DATA_DIR = app.config.get('directories', 'credo_data')

    ifs = oechem.oemolistream()
    ifs.SetFormat(oechem.OEFormat_OEB)

    # initialize progressbar
    if args.progressbar:
        bar = ProgressBar(widgets=[
            'PDB entries: ',
            SimpleProgress(), ' ',
            Percentage(),
            Bar()
        ],
                          maxval=len(assemblysets)).start()

    # iterate through assembly sets
    for counter, (pdb, assemblyset) in enumerate(assemblysets, 1):
        if args.progressbar: bar.update(counter)

        # create a data directory for this structure to which all data will be written
        struct_data_dir = os.path.join(CREDO_DATA_DIR, pdb[1:3].lower(),
                                       pdb.lower())

        # make necessary directories recursively if they do not exist yet
        if not exists(struct_data_dir):
            os.makedirs(struct_data_dir)

        # path to the file where the atom surface areas of all atoms will be written
        surface_areas_path = os.path.join(
            struct_data_dir, 'binding_site_atom_surface_areas.credo')

        # do not recalculate atom surface area contributions if incremental
        if args.incremental and exists(
                surface_areas_path) and getsize(surface_areas_path) > 0:
            continue
        elif (args.update and exists(surface_areas_path)
              and getmtime(surface_areas_path) >= time() -
              (args.update * 60 * 60 * 24) and getsize(surface_areas_path)):
            app.log.info("Output for PDB entry {0} exists and is more recent than {1} days. Skipped."\
                         .format(pdb, args.update))
            continue

        # output file stream and CSV writer
        atomfs = open(surface_areas_path, 'w')
        atomwriter = csv.writer(atomfs, dialect='tabs')

        # deal with each found assembly separately
        # some pdb entries consist of more than one
        for assembly in assemblyset:
            if args.quat:
                path = os.path.join(OEB_ASSEMBLIES_DIR, pdb[1:3].lower(),
                                    pdb.lower(), assembly)

            else:
                app.log.error("the calculation of buried ligand surface areas "
                              "is only supported for quaternary structures.")
                sys.exit(1)

            if not os.path.isfile(path):
                app.log.warn("cannot calculate buried surface areas: "
                             "file {} does not exist!".format(path))

            # get the quaternary structure
            ifs.open(str(path))

            try:
                assembly = ifs.GetOEGraphMols().next()
            except StopIteration:
                assembly = None

            if not assembly:
                app.log.warn(
                    "cannot calculate buried surface areas: "
                    "file {} does not contain a valid molecule!".format(path))
                continue

            if not assembly.GetListData('ligands'):
                continue

            # identifier of the assembly
            assembly_serial = assembly.GetIntData('assembly_serial')

            # remove all non-polymers from assembly
            for atom in assembly.GetAtoms(nonpolymers):
                assembly.DeleteAtom(atom)

            # ignore bizarre assemblies
            if not assembly.NumAtoms():
                app.log.warn(
                    "cannot calculate buried surface areas: "
                    "file {} contains assembly with no atoms!".format(path))
                continue

            # keep only the location state with the largest average occupancy
            assembly_hi_occ = oechem.OEGraphMol()
            altlocfactory = oechem.OEAltLocationFactory(assembly)
            altlocfactory.MakeCurrentAltMol(assembly_hi_occ)

            # get the ligands
            ligands = assembly_hi_occ.GetListData('ligands')

            # iterate through all ligands of the biomolecule and calculate the buried
            # surface area atom contributions for all involved atoms
            for ligand in ligands:

                # ignore small ligands
                if oechem.OECount(ligand, oechem.OEIsHeavy()) < 7: continue

                entity_serial = ligand.GetIntData('entity_serial')

                # keep only the location state with the largest average occupancy
                altlig = oechem.OEGraphMol()
                altlocfactory = oechem.OEAltLocationFactory(ligand)
                altlocfactory.MakeCurrentAltMol(altlig)

                cmplx_srf = oespicoli.OESurface()
                ligand_srf = oespicoli.OESurface()

                # make solvent-accessible surface of ligand
                oespicoli.OEMakeAccessibleSurface(ligand_srf, altlig, 0.5, 1.4)

                # get the atom contributions of the assembly surface
                ligand_atom_areas = get_atom_surface_areas(altlig, ligand_srf)

                # extract the binding site of the assembly to speed up surface
                # area calculation
                binding_site = get_binding_site(assembly_hi_occ, altlig)

                # make solvent-accessible surface of binding site
                binding_site_srf = oespicoli.OESurface()
                oespicoli.OEMakeAccessibleSurface(binding_site_srf,
                                                  binding_site, 0.5, 1.4)

                # get the atom contributions of the assembly surface
                binding_site_atom_areas = get_atom_surface_areas(
                    binding_site, binding_site_srf)

                # create complex
                cmplx = oechem.OEGraphMol()
                oechem.OEAddMols(cmplx, binding_site)
                oechem.OEAddMols(cmplx, altlig)

                # make solvent-accessible surface of the complex
                oespicoli.OEMakeAccessibleSurface(cmplx_srf, cmplx, 0.5, 1.4)

                # surface area atom contributions of the whole complex
                cmplx_atom_areas = get_atom_surface_areas(cmplx, cmplx_srf)

                ## extract the atom surface areas in the bound state through slices
                binding_site_atom_areas_bound = cmplx_atom_areas[:binding_site.
                                                                 NumAtoms()]
                ligand_atom_areas_bound = cmplx_atom_areas[binding_site.
                                                           NumAtoms():]

                # difference between apo and bound state per polymer atom
                binding_site_delta = binding_site_atom_areas - binding_site_atom_areas_bound
                ligand_delta = ligand_atom_areas - ligand_atom_areas_bound

                # boolean map indicating for which atom the surface area has changed
                binding_site_atom_map = binding_site_delta != 0
                ligand_atom_map = ligand_delta != 0

                if args.dry_run: continue

                # only record the atoms where the solvent-accessible surface
                # area has actually changed
                write_atoms(atomwriter, binding_site, binding_site_atom_map,
                            pdb, assembly_serial, entity_serial,
                            binding_site_atom_areas,
                            binding_site_atom_areas_bound)

                # only record the atoms where the solvent-accessible surface area
                # has actually changed
                write_atoms(atomwriter, altlig, ligand_atom_map, pdb,
                            assembly_serial, entity_serial, ligand_atom_areas,
                            ligand_atom_areas_bound)

                app.log.debug("wrote buried surface areas for all ligands in "
                              "biomolecule {} to {}.".format(
                                  pdb, surface_areas_path))

            atomfs.flush()
        atomfs.close()

    if args.progressbar:
        bar.finish()

Esempio n. 28

0

Mostra file

File: generate_abstract_files.py Progetto: JaimieMurdock/hyperbrain

import sys

from progressbar import ProgressBar, Bar

if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('abstract')
    parser.add_argument('-o', '--output', default='data-08-2018')
    args = parser.parse_args()

    abstracts = defaultdict(str)
    titles = defaultdict(str)

    with open(args.abstract) as absfile:
        for line in absfile:
            id, doi, title, abstract = line.split('\t', 3)
            titles[id] = title
            abstracts[id] = title
    
    if not os.path.exists(args.output):
        os.makedirs(args.output)
    
    pbar = ProgressBar(widgets=[Bar()])
    for key in pbar(abstracts.keys()):
        filename = os.path.join(args.output, key)
        with open(filename, 'w') as outfile:
            outfile.write(titles[key] + '\n')
            outfile.write(abstracts[key] + '\n')
            outfile.write(' \n')

Esempio n. 29

0

Mostra file

File: dataset_filter.py Progetto: spinlud/datamining-eval

from collections import defaultdict
from collections import OrderedDict
import pandas as pd
import matplotlib.pyplot as plt


from progressbar import AnimatedMarker, Bar, BouncingBar, Counter, ETA, \
 FileTransferSpeed, FormatLabel, Percentage, \
 ProgressBar, ReverseBar, RotatingMarker, \
 SimpleProgress, Timer

# progress bar settings
widgets = [
    'Progress: ',
    Percentage(), ' ',
    Bar(marker=RotatingMarker()), ' ',
    ETA()
]


def getTaggedMovies():
    tagged_movies = set()
    with io.open("datasets/ml-latest-small/folksonomy.csv",
                 "r",
                 encoding="ISO-8859-1") as file:
        for line in file:
            tokens = line.strip().split("\t")
            movieid = int(tokens[0], 10)
            tagged_movies.add(movieid)
    return tagged_movies

Esempio n. 30

0

Mostra file

def main(pdfdir, textdir):

    dirlist = [fn for fn in os.listdir(pdfdir) if fn.endswith('.pdf')]

    print 'Extracting text, using Tika, from %d files in %s.' % \
        (len(dirlist), pdfdir)
    print '  Writing output text files to %s.' % textdir

    if not os.path.exists(textdir):
        os.mkdir(textdir)

    widgets = [
        'Files (of %d): ' % len(dirlist),
        Percentage(), ' ',
        Bar('='), ' ',
        ETA()
    ]
    pbar = ProgressBar(widgets=widgets, maxval=len(dirlist)).start()

    for (i, fn) in enumerate(dirlist):
        pbar.update(i)
        #if int(fn.split('.')[0]) != 1001:
        #    continue
        #print fn
        parsed = parser.from_file(pdfdir + '/' + fn)

        try:
            if parsed['content'] == None:
                print 'Tika found no content in %s.' % fn
                import pdb
                pdb.set_trace()
                continue
        except:
            print 'Tika could not parse %s.' % fn
            continue

        with io.open(textdir + '/' + fn[0:-4] + '.txt', 'w',
                     encoding='utf8') as outf:
            cleaned = parsed['content']

            # Translate some UTF-8 punctuation to ASCII
            punc = {
                0x2018: 0x27,
                0x2019: 0x27,  # single quote
                0x201C: 0x22,
                0x201D: 0x22,  # double quote
                0x2010: 0x2d,
                0x2011: 0x2d,
                0x2012: 0x2d,
                0x2013: 0x2d,  # hyphens
                0xF0B0: 0xb0,  # degree
                0xFF0C: 0x2c,  # comma
                0x00A0: 0x20,  # space
                0x2219: 0x2e,
                0x2022: 0x2e,  # bullets
            }
            # 0x005E:0x5e, 0x02C6:0x5e, 0x0302:0x5e, 0x2038:0x5e, # carets
            # 0x00B0:0x6f, 0x02DA:0x6f, # degree
            # 0x00B9:0x31, 0x00B2:0x32, 0x00B3:0x33, # exponents
            cleaned = cleaned.translate(punc)

            # Replace newlines that separate words with a space (unless hyphen)
            cleaned = re.sub(r'([^\s-])[\r|\n]+([^\s])', '\\1 \\2', cleaned)

            # Remove hyphenation at the end of lines
            # (this is sometimes bad, as with "Fe-\nrich")
            cleaned = cleaned.replace('-\n', '\n')

            # Remove all newlines
            cleaned = re.sub(r'[\r|\n]+', '', cleaned)

            # Remove xxxx.PDF
            cleaned = re.sub(r'([0-9][0-9][0-9][0-9].PDF)',
                             '',
                             cleaned,
                             flags=re.IGNORECASE)
            # And "xx(th|st) Lunar and Planetary Science Conference ((19|20)xx)"
            # with optional parentheses, optional LPI contrib
            cleaned = re.sub(
                r'([0-9][0-9].. Lunar and Planetary Science Conference \(?(19|20)[0-9][0-9]\)?)( \(LPI Contrib. No. [0-9][0-9][0-9][0-9]\))? ?',
                '',
                cleaned,
                flags=re.IGNORECASE)
            # And "Lunar and Planetary Science XXXIII (2002)"
            # with Roman numeral and optional year
            cleaned = re.sub(
                r'(Lunar and Planetary Science [CDILVXM]+( \((19|20)[0-9][0-9]\))?) ?',
                '',
                cleaned,
                flags=re.IGNORECASE)

            # Remove mailto: links
            cleaned = re.sub(r'mailto:[^\s]+', '', cleaned)

            outf.write(cleaned)
            outf.close()