Example #1
0
def normalise_image_size_and_bbox():
    create_dir(NORMALISED_IMAGES_PATH)
    create_dir(NORMALISED_BBOX_IMAGES_PATH)
    LOGGER.debug('Normalising image sizes to same size')
    fp = open(MIN_DIM, 'rb')
    dim = pickle.load(fp)
    LOGGER.debug('Normalising all images to %d x %d', dim, dim)
    bbox_json = read_json_file(BBOX_JSON_PATH)
    norm_bbox_json = {}
    for image in bbox_json.keys():
        img = cv2.imread(RAW_TRAIN_IMAGES_PATH + image)
        y_ = img.shape[0]
        x_ = img.shape[1]
        img = cv2.resize(img, (dim, dim))
        cv2.imwrite(NORMALISED_IMAGES_PATH + image, img)
        x_scale = dim / x_
        y_scale = dim / y_
        tc_x = int(np.round(bbox_json[image][0][0] * x_scale))
        tc_y = int(np.round(bbox_json[image][0][1] * y_scale))
        bl_x = int(np.round(bbox_json[image][1][0] * x_scale))
        bl_y = int(np.round(bbox_json[image][1][1] * y_scale))
        norm_bbox_json[image] = [tc_x, tc_y, bl_x, bl_y]

        im_bbox = cv2.rectangle(img, (tc_x, tc_y), (bl_x, bl_y), (255, 0, 0),
                                2)
        cv2.imwrite(NORMALISED_BBOX_IMAGES_PATH + image, im_bbox)

    write_json_to_file(norm_bbox_json, NORM_BBOX_JSON_PATH)
Example #2
0
def store_parameter(product_number, tester, req_file, golden_file, dut_file, output_dir):
    """
    Store the parameters to module report.
    Arguments:
        product_number - product number of DUT
        tester - tester of DB check
        req_file - requirement file
        golden_file - golden file
        dut_file - dut file
        output_dir - output directory
    """
    global g_product_number
    global g_tester
    global g_req_file
    global g_golden_file
    global g_dut_file
    global g_output_dir
    global result_path

    g_product_number = product_number
    g_tester = tester
    g_req_file = req_file
    g_golden_file = golden_file
    g_dut_file = dut_file
    g_output_dir = output_dir

    if not g_output_dir:
        g_output_dir = os.getcwd()
    result_path_suffix = time.strftime("_%Y%m%d_%H%M%S")
    result_path = os.path.join(g_output_dir, "result" + result_path_suffix)
    logger().info("result_path= %s" % result_path)
    uti.create_dir(result_path)
Example #3
0
    def model_inference(self):
        """
        Functionality we are trying to achieve
            1. get the anchors which are positively predicted (indexes)
            2. get the dim of anchors using index
            3. get the delta of anchors using pred index, add and we get set of anchor boxes
            4. scale the acnhor boxes by multiplying by 16
            5. we have regions
            6. reshape to diagonals and put bounding box over it
        """
        abox = AnchorBox()
        create_dir(TEST_OUTPUT_PATH)
        image_list = os.listdir(TEST_IMAGES_PATH)
        all_predicted_diagonals = []
        LOGGER.info('Inference begun')
        for image in image_list:
            path_of_image = TEST_IMAGES_PATH + '/' + image
            img = cv2.resize(cv2.imread(path_of_image),
                             (self.image_size, self.image_size))
            img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0)
            img = img.to(self.device)

            pred_cls, pred_reg = self.forward(img)
            pred_cls = pred_cls.detach().cpu()
            pred_reg = pred_reg.detach().cpu()
            foreground_idx = 0
            scores = []

            for cls_out in pred_cls[0]:
                cls_out = Softmax(dim=0).forward(cls_out)
                if cls_out[torch.argmax(cls_out).item()] > 0.8:
                    # foreground_prob = torch.max(Softmax(dim=0).forward(cls_out)).item()
                    # foreground_prob = torch.max(torch.exp(LogSoftmax(dim=0).forward(cls_out))).item()
                    foreground_prob = torch.max(cls_out).item()
                    scores.append(foreground_prob)

                    delta_xywh = pred_reg[0][foreground_idx]
                    anchor_xywh = abox.anchor_boxes[foreground_idx]
                    proposed_region = torch.mul(
                        torch.add(delta_xywh, anchor_xywh),
                        torch.tensor([VGG_SCALE_SIZE]).float())
                    diag = get_diagonal_from_mpwh(proposed_region.tolist())
                    all_predicted_diagonals.append(
                        [diag[0][0], diag[0][1], diag[1][0], diag[1][1]])
                foreground_idx = foreground_idx + 1
            LOGGER.debug('Length of predicted_diagonals: %s',
                         str(all_predicted_diagonals))
            boxes = torch.tensor(all_predicted_diagonals)
            scores = torch.tensor(scores)
            keep, max_count = non_max_suppress(boxes, scores)

            for idx in range(max_count):
                box = boxes[keep[idx].item()]
                img = cv2.imread(path_of_image)
                img = cv2.resize(img, (self.image_size, self.image_size))
                im_bbox = cv2.rectangle(img, (box[0], box[1]),
                                        (box[2], box[3]), (255, 0, 0), 2)
                cv2.imwrite(TEST_OUTPUT_PATH + image, im_bbox)
Example #4
0
def main():
    """
    Max Ent Model on fMRI data
    """
    args = parse_args()
    input_path = args.input
    output_path = args.output
    brain_areas = args.areas
    threshold = args.threshold

    brain_areas = [[
        21, 3, 19, 24, 16, 20, 18, 17, 62, 40, 45, 46, 41, 49, 44, 48, 47, 25
    ], [36, 50, 15, 29], [51, 58, 34, 60, 56, 55, 14, 7, 10, 9],
                   [2, 12, 8, 5, 13, 11, 4, 63, 53, 54, 57, 52, 61],
                   [37, 39, 59, 38, 6, 26, 28, 27],
                   [30, 35, 23, 22, 32, 31, 0, 1, 64, 42, 43, 33, 65]]

    lambdas_dict = {
        'lambda1': -4.37389,
        'lambda2': -0.0181369,
        'lambda3': 0.0434916,
        'lambda4': 0.0138453,
        'lambda5': -0.182505,
        'lambda6': 0.0141578,
        'lambda7': 0.0448107,
        'lambda8': -0.143712,
        'lambda9': -0.0357837,
        'lambda10': -0.0992271,
        'lambda11': -0.0599827,
        'lambda12': -0.194269,
        'lambda13': -0.0807454,
        'lambda14': -0.0203112,
        'lambda15': 0.205356,
        'lambda16': -0.0932409,
        'lambda17': -0.286996,
        'lambda18': -0.0666362,
        'lambda19': 0.047215,
        'lambda20': -0.0219382,
        'lambda21': 0.0555467,
        'lambda22': -0.417749
    }

    create_dir(output_path)

    array = mean_over_areas(input_path, brain_areas, output_path)
    binarized = binrize(array, threshold)
    unique, count, patterns, count_norm = number_of_patterns(binarized)
    entropy_of_distribution_data(count_norm, output_path)
    sequence_count_plot(patterns, count_norm, output_path)
    areas_average = average_activity(binarized, output_path)
    empirical_pairwise_activation(binarized, output_path)
    pairwise_covar(binarized, areas_average, output_path)
    probabilities, probas_list = calculate_prob_max_ent(
        lambdas_dict, output_path)
    sequence_count_plot_model(patterns, count_norm, probabilities, output_path)
    entropy_model(probas_list, output_path)
Example #5
0
def preform_lle_on_dynamic_connectivity(input_path, output_path, brain_areas,
                                        pattern):
    """
    Computes the dynamic connectivity of brain areas with performing
    a locally linear embedding returning its matrix.

    :param input_path: path to input dir
    :type input_path: str
    :param output_path: path to output directory 
    :type output_path: str
    :param brain_areas: number of brain areas
    :type brain_areas: int
    :param pattern: pattern of input files
    :type pattern: str
    :return: LLE matrix, LLE matrix shape
    :rtype: np.ndarray, tuple
    """
    paths = return_paths_list(input_path, output_path, pattern=pattern)
    n_subjects = len(paths)
    array = np.genfromtxt(paths[0], delimiter=',')
    t_phases = array.shape[0]
    dFC = np.full((brain_areas, brain_areas), fill_value=0).astype(np.float64)
    lle_components = np.full((n_subjects, t_phases, (brain_areas * 2)),
                             fill_value=0).astype(np.float64)
    for n in tqdm(range(0, n_subjects)):
        phases = convert_to_phases(paths[n], output_path, brain_areas,
                                   t_phases, n)
        for t in range(0, t_phases):
            for i in range(0, brain_areas):
                for z in range(0, brain_areas):
                    if np.absolute(phases[i, t] - phases[z, t]) > np.pi:
                        dFC[i,
                            z] = np.cos(2 * np.pi - np.absolute(phases[i, t] -
                                                                phases[z, t]))
                    else:
                        dFC[i, z] = np.cos(
                            np.absolute(phases[i, t] - phases[z, t]))
            dfc_output = os.path.join(output_path, 'dFC')
            create_dir(dfc_output)
            np.savez(
                os.path.join(dfc_output, 'subject_{}_time_{}'.format(n, t)),
                dFC)
            lle, err = manifold.locally_linear_embedding(dFC,
                                                         n_neighbors=12,
                                                         n_components=2)
            with open(
                    os.path.join(output_path, 'LLE_error_{}_{}'.format(n, t)),
                    'w') as output:
                json.dump(err, output)
            lle_components[n, t, :] = np.squeeze(lle.flatten())
    # save the LLE matrix into a .npz file
    np.savez(os.path.join(output_path, 'components_matrix'), lle_components)
    return lle_components, lle_components.shape
def main():
    """
    DFC features
    """
    args = parse_args()
    input_path = args.input
    output_path = args.output
    starts_json = args.starts
    clusters = args.clusters
    brain_areas = args.features

    # Load labels and starts json and divide labels by tasks into separate
    # folders
    labels = np.load(clusters)['arr_0']
    with open(starts_json) as s:
        starts = json.load(s)
    clusters = []
    cluster_paths = []
    for key, values in tqdm(starts.iteritems()):
        output_p = os.path.join(output_path, key)
        create_dir(os.path.join(output_p, 'dFC_out'))
        new_array = labels[values[0]: values[1], :]
        dFC_paths = return_paths_list(os.path.join(input_path, key, 'dFC'),
                                      output_path, '.npz')
        np.savez(os.path.join(output_path, key, 'dFC_out', 'labels_{}'.format(key)),
                 new_array)

        # Tasks labels divide dFCs according to labels into states folders
        for n in dFC_paths:
            cluster = new_array[dFC_paths.index(n)][-1]
            clusters.append(cluster)
            cluster_output = os.path.join(output_p, 'dFC_out', str(cluster))
            create_dir(cluster_output)
            cluster_paths.append(cluster_output)
            file_name = os.path.basename(n)
            shutil.copyfile(n, os.path.join(cluster_output, file_name))

    # Average for clusters and visualise
    n_clusters = max(clusters)  # Number of clusters
    cluster_paths = list(set(cluster_paths))
    for c in tqdm(cluster_paths):
        matrix_paths = return_paths_list(c, output_path, '.npz')
        n_matrix = len(matrix_paths)
        avg_dfc = np.full((n_matrix, brain_areas, brain_areas), fill_value=0).astype(
            np.float64)
        for i in range(n_matrix):
            matrix = np.load(matrix_paths[i])['arr_0']
            avg_dfc[i, :, :] = matrix
        averaged = np.average(avg_dfc, 0)  # Average over all matrices in a cluster
        np.savez(os.path.join(c, 'averaged_dfc'), averaged)
        plot_dfc_areas_correlation(averaged, c)
Example #7
0
def get_img_bbox_coors(limit=9999999999999999999):
    create_dir(BBOX_IMAGES_PATH)
    LOGGER.debug('Getting image map with coordinates for Bounding Box')
    json_processed = dict()
    count = 0
    for image in IMAGES:
        if image['file_name'] in IMAGES_IN_DIR:
            image_id = image['id']
            tc_coor, bl_coor = get_coors_from_annotation_by_id(image_id)
            json_processed[image['file_name']] = [tc_coor, bl_coor]
            print(json_processed)
            count += 1
        if count >= limit:
            break
    write_json_to_file(json_processed, BBOX_JSON_PATH)
def main(args):
    print('Reading config')
    config = utilities.read_json_config(args.config, utilities.Task.parse)
    print('Starting parsing...')
    output_dir = '{}/{}'.format(config['dir']['data'], config['name'])
    print('Creating data directory {}'.format(output_dir))
    utilities.create_dir(output_dir)
    print('Reading raw data...')
    threads = []
    pbar = tqdm(range(len(config['data'])))
    for index in pbar:
        infile = config['data'][index]['file']
        outfile = '{}/{}.csv'.format(output_dir, config['data'][index]['name'])
        pbar.set_description('Processing raw_data in={} out={}'.format(
            infile, outfile))
        parse(
            infile, outfile, config['data'][index]['old_format']
            if 'old_format' in config['data'][index] else False)
Example #9
0
def save_report():
    """
    Save the report document.
    """
    global result_path
    global report_file
    dir_existed = os.path.isdir(result_path)
    if not dir_existed:
        uti.create_dir(result_path)

    report_file = os.path.join(result_path, "DbCheckReport.docx")
    try:
        document.save(report_file)
    except PermissionError:
        logger().exception('Report file has been opened by another program.')
        os.sys.exit(1)
    else:
        logger().info("%s is saved successfully." % os.path.abspath(report_file))
Example #10
0
def main(args):
    print('Reading config...')
    config = utilities.read_json_config(args.config, utilities.Task.train)
    train_type = str(args.type)
    print('Preparing output directory...')
    output_dir = '{}/{}/train/{}'.format(config['dir']['output'], config['name'], train_type)
    utilities.create_dir(output_dir)
    print('Preparing dataset...')
    dataset = prepare_dataset(config, train_type, get_data_col(args.type))
    print('Preparing trainers...')
    trainers = prepare_trainer(config)
    print('There are {} models that needs to be trained'.format(len(trainers)))
    print()
    print('Training predictors...')
    predictors = train_predictor(config, trainers, dataset)
    print(predictors)
    print()
    print('Test predictors...')
    tests = test_predictor(predictors, dataset)

    print('Preparing other output dirs')
    cdf_dir = '{}/cdf'.format(output_dir)
    gnuplot_dir = '{}/gnuplot'.format(output_dir)
    plot_dir = '{}/plot'.format(output_dir)
    model_dir = '{}/model'.format(output_dir)

    utilities.create_dir(cdf_dir)
    utilities.create_dir(gnuplot_dir)
    utilities.create_dir(plot_dir)
    utilities.create_dir(model_dir)

    print('Generate diff and plots...')

    pbar = tqdm(predictors)
    for predictor in pbar:
        pbar.set_description('Generate diffs for {}'.format(predictor))
        diff = generate_diff(config, predictors, predictor, dataset)
        pbar.set_description('Saving diffs for {}'.format(predictor))
        sorted_indexes = save_diff(config, cdf_dir, predictor, diff)
        pbar.set_description('Creating plot for {}'.format(predictor))
        save_plot(config, cdf_dir, gnuplot_dir, plot_dir, predictor, diff, sorted_indexes)
        pbar.set_description('Saving model for {}'.format(predictor))
        utilities.save('{}/{}.joblib'.format(model_dir, predictor), predictors[predictor])
Example #11
0
def dynamic_functional_connectivity(input_path, output_path, brain_areas,
                                    pattern):
    """
    Computes the dynamic functional connectivity of brain areas.

    :param input_path: path to input dir
    :type input_path: str
    :param output_path: path to output directory
    :type output_path: str
    :param brain_areas: number of brain areas
    :type brain_areas: int
    :param pattern: pattern of input files
    :type pattern: str
    :return: dFC output path
    :rtype: str
    """
    paths = return_paths_list(input_path, output_path, pattern=pattern)
    n_subjects = len(paths)
    array = np.genfromtxt(paths[0], delimiter=',')
    t_phases = array.shape[0]
    dFC = np.full((brain_areas, brain_areas), fill_value=0).astype(np.float64)

    for n in tqdm(range(n_subjects)):
        phases = convert_to_phases(paths[n], output_path, brain_areas,
                                   t_phases, n)
        for t in range(0, t_phases):
            for i in range(0, brain_areas):
                for z in range(0, brain_areas):
                    if np.absolute(phases[i, t] - phases[z, t]) > np.pi:
                        dFC[i,
                            z] = np.cos(2 * np.pi - np.absolute(phases[i, t] -
                                                                phases[z, t]))
                    else:
                        dFC[i, z] = np.cos(
                            np.absolute(phases[i, t] - phases[z, t]))
            dfc_output = os.path.join(output_path, 'dFC')
            create_dir(dfc_output)
            np.savez(
                os.path.join(dfc_output, 'subject_{}_time_{}'.format(n, t)),
                dFC)

    return dfc_output
Example #12
0
 def model_inference(self):
     create_dir(TEST_OUTPUT_PATH)
     image_list = os.listdir(TEST_IMAGES_PATH)
     LOGGER.info('Inference begun')
     for image in image_list:
         path_of_image = TEST_IMAGES_PATH + '/' + image
         img = cv2.resize(cv2.imread(path_of_image),
                          (self.image_size, self.image_size))
         img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0)
         img = img.to(self.device)
         predicted_width, predicted_height, predicted_midpoint = self.forward(
             img)
         mp_x = predicted_midpoint[0][0].detach().cpu().item()
         mp_y = predicted_midpoint[0][1].detach().cpu().item()
         w = predicted_width[0].detach().cpu().item()
         h = predicted_height[0].detach().cpu().item()
         diag = get_diagonal_from_mpwh([mp_x, mp_y, w, h])
         img = cv2.imread(path_of_image)
         img = cv2.resize(img, (self.image_size, self.image_size))
         im_bbox = cv2.rectangle(img, diag[0], diag[1], (255, 0, 0), 2)
         cv2.imwrite(TEST_OUTPUT_PATH + image, im_bbox)
Example #13
0
 def save(self):
     # Maybe delete previous ref file
     dir_existed = os.path.isdir(report.result_path)
     if not dir_existed:
         uti.create_dir(report.result_path)
     suffix = time.strftime("%Y%m%d_%H%M%S")
     if self.etype == self.TABLE:
         filename = os.path.abspath(
             report.result_path + '\\{}_{}.csv'.format(
                 self.title.replace('/', '_').replace(':', '-'), suffix))
         with open(filename, "w") as file:
             for row in self.t_data:
                 file.write(','.join(row) + "\n")
     else:
         filename = os.path.abspath(
             report.result_path + '\\{}_{}.png'.format(
                 self.title.replace('/', '_').replace(':', '-'), suffix))
         plt.gcf().savefig(filename)
         logger().debug("Figure of command(%s) is saved as:%s" %
                        (self.title, filename))
     self.ref = filename
     return filename
Example #14
0
def main(args):
    print('Reading config...')
    config = utilities.read_json_config(args.config, utilities.Task.inference)
    print('Preparing output directory...')
    output_dir = '{}/{}/inference'.format(config['dir']['output'],
                                          config['name'])
    utilities.create_dir(output_dir)
    print('Preparing dataset...')
    datasets = prepare_dataset(config, COMBINED_COL)
    print('Preparing predictors...')
    main_predictor = utilities.load(config['model']['main']['file'])
    stringtable_predictor = utilities.load(
        config['model']['stringtable']['file'])
    prune_predictor = utilities.load(config['model']['prune']['file'])
    otyrt_predictor = utilities.load(config['model']['otyrt']['file'])

    print('Preparing other output dirs')
    cdf_dir = '{}/cdf'.format(output_dir)
    gnuplot_dir = '{}/gnuplot'.format(output_dir)
    plot_dir = '{}/plot'.format(output_dir)

    utilities.create_dir(cdf_dir)
    utilities.create_dir(gnuplot_dir)
    utilities.create_dir(plot_dir)

    pbar = tqdm(range(len(datasets)))
    for idx in pbar:
        name = config['data'][idx]['name']
        pbar.set_description(
            'Outputting performance metrics for dataset {}'.format(name))
        mse, r2 = test_predictor(datasets[idx], main_predictor,
                                 stringtable_predictor, prune_predictor,
                                 otyrt_predictor)
        pbar.set_description('Generating diffs for dataset {}'.format(name))
        diff = generate_diff(datasets[idx], main_predictor,
                             stringtable_predictor, prune_predictor,
                             otyrt_predictor)
        pbar.set_description(
            'Saving diffs for dataset {} prediction'.format(name))
        sorted_indexes = save_diff(config, cdf_dir,
                                   config['data'][idx]['name'], diff)
        pbar.set_description(
            'Creating plot for database {} prediction'.format(name))
        save_plot(config['model'], config['data'][idx], cdf_dir, gnuplot_dir,
                  plot_dir, diff, sorted_indexes)

    print('Saving combined plot')
    save_plots(config, cdf_dir, gnuplot_dir, plot_dir)
 def thumbnail(session, db_object, db_manager, parent=None):
     db_manager.flush(session)
     if db_object.id:
         if parent:
             thumbnail_path = '{}/{}-{}/{}-{}/thumbnail.jpg'.format(
                 CronJobManager.download_path, parent.title, parent.id,
                 db_object.title, db_object.id)
         else:
             thumbnail_path = '{}/{}-{}/thumbnail.jpg'.format(
                 CronJobManager.download_path, db_object.title,
                 db_object.id)
         if not utils.dir_exists(thumbnail_path):
             utils.create_dir(thumbnail_path.rsplit('/', 1)[0])
             CronJobManager.__instance.dmm_ripper.download_image(
                 db_object.thumbnail_dmm, thumbnail_path)
             CronJobManager.logger.info('Storing thubnail in %s',
                                        thumbnail_path)
             db_object.thumbnail_local = thumbnail_path
             try:
                 db_manager.commit(session)
             except Exception as e:
                 CronJobManager.logger.exception('Error updating local ' \
                     + 'thubnail\'s path for: %s', db_object.title)
                 db_manager.rollback(session)
Example #16
0
    def run(self):
        if self.runlist:
            self.use_runlist = self.runlist
        else:
            self.default_runlist()

        #check if  user provided runlist exsis or not
        if not utilities.is_file_valid(self.use_runlist):
            raise RunfileError("runlist %s does not exist" %
                               self.use_runlist)

        #check if targetfile exist or not
        if (not self.targetfile) or \
           (not utilities.is_file_valid(self.targetfile)):
            raise RunnerError("Target file %s is not passed or does not exist" % (self.targetfile))
        #create log folder if not existing
        if self.bat not in self.bat_list:
            raise RunnerError("Invalid bat type %s, provide bvt, ceph_qa")
        #check if set_number is provided, not mendatory for bvt
        if self.bat != 'bvt':
            if not self.set_number:
                raise RunnerError("Please pass set_number with -s option")
        self.log_dir_inuse = self.default_log_path_dict.get(self.bat)
        if not utilities.is_path(self.log_dir_inuse):
            utilities.create_dir(self.log_dir_inuse)

        #start executing the test present in the runlist
        obj = teuthology.Teuthology(targetfile=self.targetfile,
                                    bat_type=self.bat,
                                    set_number=self.set_number,
                                    report_result=self.report_result,
                                    runlist_path=self.use_runlist,
                                    log_path=self.log_dir_inuse,
                                    nuke=self.debug,  mail=self.mail,
                                    no_poweroff=self.no_poweroff)
        obj.run()
Example #17
0
    def model_train(self, epoch_offset=0, lamda=10, nreg=2400, ncls=256):
        LOGGER.info('Started Training with an offset of %s', str(epoch_offset))
        create_dir(MODEL_SAVE_PATH)
        optimizer = SGD(self.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
        scheduler = StepLR(optimizer,
                           step_size=SCHEDULER_STEP,
                           gamma=SCHEDULER_GAMMA)
        LOGGER.info(
            'Learning Rate: %s, Momentum: %s, Scheduler_step: %s, scheduler_gamma: %s',
            str(LEARNING_RATE), str(MOMENTUM), str(SCHEDULER_STEP),
            str(SCHEDULER_GAMMA))
        loss_for_classification = NLLLoss()
        loss_for_regression = SmoothL1Loss()

        img_coors_json = read_json_file(BBOX_XYWH_JSON_PATH)

        anchor_box = AnchorBox()
        all_background_index = []
        all_foreground_index = []
        all_reg_tensor = []
        for image, coors in img_coors_json.items():
            li_fore_index, li_back_index, reg_ten_actual = \
                anchor_box.calculate_p_for_each_anchor_box(anchor_box.anchor_boxes, coors)
            all_background_index.append(li_back_index)
            all_foreground_index.append(li_fore_index)
            all_reg_tensor.append(reg_ten_actual)

        for epoch in range(EPOCHS):
            epoch_loss = 0.0
            scheduler.step(epoch)
            LOGGER.debug('Epoch: %s, Current Learning Rate: %s',
                         str(epoch + epoch_offset), str(scheduler.get_lr()))
            count = 0
            for image, coors in img_coors_json.items():
                img = cv2.imread(NORMALISED_IMAGES_PATH + image)
                img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0)
                img = img.to(self.device)
                pred_cls, pred_reg = self.forward(img)
                li_foreground_index = all_foreground_index[count]
                li_background_index = all_background_index[count]
                reg_tensor_actual = all_reg_tensor[count]
                count = count + 1

                exp_torch_fg_bg = []
                pred_torch_fg = torch.zeros(1, pred_cls.shape[2])
                pred_torch_fg = pred_torch_fg.to(self.device)
                pred_torch_reg = torch.zeros(1, pred_reg.shape[2])
                pred_torch_reg = pred_torch_reg.to(self.device)
                for idx_foreground in li_foreground_index:
                    exp_torch_fg_bg.append(1)
                    pred_torch_fg = torch.cat(
                        (pred_torch_fg,
                         pred_cls[0][idx_foreground].unsqueeze(0)),
                        dim=0)
                    pred_torch_reg = torch.cat(
                        (pred_torch_reg,
                         pred_reg[0][idx_foreground].unsqueeze(0)),
                        dim=0)
                pred_torch_fg = pred_torch_fg[1:]
                pred_torch_reg = pred_torch_reg[1:]

                pred_torch_bg = torch.zeros(1, pred_cls.shape[2])
                pred_torch_bg = pred_torch_bg.to(self.device)
                for idx_background in li_background_index:
                    exp_torch_fg_bg.append(0)
                    pred_torch_bg = torch.cat(
                        (pred_torch_bg,
                         pred_cls[0][idx_background].unsqueeze(0)),
                        dim=0)
                pred_torch_bg = pred_torch_bg[1:]

                pred_cls_only_background_foreground = torch.cat(
                    (pred_torch_fg, pred_torch_bg), dim=0)
                pred_cls_only_background_foreground = LogSoftmax(dim=1).\
                    forward(pred_cls_only_background_foreground)

                exp_torch_fg_bg = torch.tensor(exp_torch_fg_bg)

                exp_torch_fg_bg = exp_torch_fg_bg.to(self.device)
                pred_cls_only_background_foreground = pred_cls_only_background_foreground.to(
                    self.device)
                reg_tensor_actual = reg_tensor_actual.to(self.device)
                pred_torch_reg = pred_torch_reg.to(self.device)
                cls_loss = loss_for_classification(
                    pred_cls_only_background_foreground, exp_torch_fg_bg)
                reg_loss = loss_for_regression(reg_tensor_actual,
                                               pred_torch_reg)
                total_image_loss = (cls_loss / ncls) + (reg_loss * lamda /
                                                        nreg)
                total_image_loss = total_image_loss.to(self.device)
                optimizer.zero_grad()
                total_image_loss.backward()
                optimizer.step()
                epoch_loss = epoch_loss + total_image_loss.item()
            LOGGER.debug('Loss at Epoch %s: %s', str(epoch + epoch_offset),
                         str(epoch_loss))
            if epoch % EPOCH_SAVE_INTERVAL == 0:
                torch.save(
                    self.state_dict(), MODEL_SAVE_PATH + 'model_epc_' +
                    str(epoch + epoch_offset) + '.pt')
            if epoch % 5 == 0:
                LOGGER.info('Loss at Epoch %s: %s', str(epoch + epoch_offset),
                            str(epoch_loss))
Example #18
0
    def model_train(self, epoch_offset=0):
        create_dir(MODEL_SAVE_PATH)
        loss_for_regression = MSELoss()
        img_coors_json = read_json_file(BBOX_XYWH_JSON_PATH)

        optimizer = RMSprop(self.parameters(),
                            lr=LEARNING_RATE,
                            momentum=MOMENTUM)
        # optimizer = Adam(self.parameters(), lr=LEARNING_RATE)
        #         optimizer = SGD(self.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

        scheduler = StepLR(optimizer,
                           step_size=SCHEDULER_STEP,
                           gamma=SCHEDULER_GAMMA)

        for epoch in range(EPOCHS):
            epoch_loss = 0.0
            scheduler.step(epoch)
            LOGGER.debug('Epoch: %s, Current Learning Rate: %s',
                         str(epoch + epoch_offset), str(scheduler.get_lr()))
            for image, coors in img_coors_json.items():
                path_of_image = NORMALISED_IMAGES_PATH + image
                path_of_image = path_of_image.replace('%', '_')
                img = cv2.imread(path_of_image)
                img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0)
                img = img.to(self.device)
                predicted_width, predicted_height, predicted_midpoint = self.forward(
                    img)

                #all are scaled
                mp_x = coors[0][0]
                mp_y = coors[0][1]
                mp = torch.cat((torch.tensor([[mp_x]]).to(
                    self.device), torch.tensor([[mp_y]]).to(self.device)),
                               dim=1).float()

                w = coors[0][2]
                h = coors[0][3]
                loss1 = loss_for_regression(
                    predicted_height,
                    torch.tensor([[h]]).float().to(self.device))
                loss2 = loss_for_regression(
                    predicted_width,
                    torch.tensor([[w]]).float().to(self.device))
                loss3 = loss_for_regression(predicted_midpoint,
                                            mp.to(self.device))
                loss = loss1 + loss2 + loss3 / 2
                optimizer.zero_grad()
                loss.backward()
                clip_grad_norm(self.parameters(), 0.5)
                optimizer.step()
                epoch_loss = epoch_loss + loss.item()

            if epoch % 5 == 0:
                print('epoch: ' + str(epoch) + ' ' + 'loss: ' +
                      str(epoch_loss))
            if epoch % EPOCH_SAVE_INTERVAL == 0:
                print('saving')
                torch.save(
                    self.state_dict(), MODEL_SAVE_PATH + 'model_epc_' +
                    str(epoch + epoch_offset) + '.pt')
        torch.save(
            self.state_dict(),
            MODEL_SAVE_PATH + 'model_epc_' + str(epoch + epoch_offset) + '.pt')
Example #19
0
from plotting import plot_history
from settings import DOGS_VS_CATS_DIR, CONV_NETS_DIR, DOGS_VS_CATS_WEIGHTS_DIR
from utilities import create_dir

N_TRAIN_IMAGES = 3000
N_VALIDATION_IMAGES = 500
N_TEST_IMAGES = 1000
BATCH_SIZE = 64
EPOCHS = 10
USE_GENERATOR = True

original_dataset_dir = DOGS_VS_CATS_DIR
copying_dataset_dir = osp.join(DOGS_VS_CATS_DIR, 'train')
base_dir = osp.join(DOGS_VS_CATS_DIR, 'small_version')
create_dir(base_dir)

train_dir = osp.join(base_dir, 'train')
create_dir(train_dir)
validation_dir = osp.join(base_dir, 'validation')
create_dir(validation_dir)
test_dir = osp.join(base_dir, 'test')
create_dir(test_dir)

train_cats_dir = osp.join(train_dir, 'cats')
create_dir(train_cats_dir)
train_dogs_dir = osp.join(train_dir, 'dogs')
create_dir(train_dogs_dir)

validation_cats_dir = osp.join(validation_dir, 'cats')
create_dir(validation_cats_dir)
np.random.seed(args.seed)
## 2. load the data
train_x, val_x, test_x, train_y, val_y, test_y, noisy_S = am_util.load_data(
    args.data_dir)
[train_num, feature_dim] = train_x.shape
[train_num, output_dim] = train_y.shape
batch_num = int(train_num / args.batch_size)

## 3. fix seed for graph and numpy operations
tf.reset_default_graph()
tf.set_random_seed(args.seed)

## 4.create output folders to save the log files and trained model
## ./output/model_name
output_dir = args.output_dir + args.model_name
am_util.create_dir(output_dir)
##  ./output/model_name/logs/
am_util.create_dir(os.path.join(output_dir, "logs"))
##  ./output/model_name/models/
am_util.create_dir(os.path.join(output_dir, "models"))


## 5.define the full graph
class NNModel(object):
    def __init__(self, sess, args):
        ''' the main neural network model class '''
        #self.config = vars(args)
        self.x = tf.placeholder(tf.float32, [None, feature_dim], name="input")
        self.y_ = tf.placeholder(tf.float32, [None, output_dim], name="output")
        self.is_training = tf.placeholder(tf.bool)
        ## for the augmented data
Example #21
0
 def request_download_book(self, bot, update, book, user, user_data,
     inline_message_id):
     
     session = self.db_manager.create_session()
     session.add(book)
     book_path = utils.get_book_download_path(self.download_path, book)
     if not utils.dir_exists(book_path):
         utils.create_dir(book_path)
     book_images = utils.get_book_page_num_list(book_path)
     missing_images = utils.book_missing_pages(1, book.pages, book_images)
     is_toc_missing = not utils.dir_exists(path.join(book_path, 'toc.txt'))
     self.logger.info('User %s requested to download book %s',
         user.id, book.id)
     self.logger.info('Removing download button of inline query of ' \
         + 'book %s for user %s', book.id, user.id)
     bot.editMessageReplyMarkup(
         chat_id = None,
         inline_message_id = inline_message_id,
         reply_markup=None
     )
     if not missing_images and not is_toc_missing:
         self.logger.info('All the book %s pages are available in local ' \
             + 'storage', book.id)
         if user.file_format == FileFormat.pdf:
             preferred_format = FileFormat(FileFormat.pdf).name
         elif user.file_format == FileFormat.epub:
             preferred_format = FileFormat(FileFormat.epub).name
         elif user.file_format == FileFormat.zip:
             preferred_format = FileFormat(FileFormat.zip).name
         file_format_path = utils.get_book_by_format(
             book_path, '.{}'.format(preferred_format.lower())
         )
         if file_format_path:
             if path.getsize(file_format_path) \
                 >= CronJobManager.max_upload_size:
                 
                 bot.send_message(chat_id=user.id,
                     text=self.lang[user.language_code]['generate_url']
                 )
                 self.scheduler.generante_storage_url(
                     file_format_path, preferred_format, bot, user
                 )
             else:
                 self.logger.info('Sending %s book transmission start ' \
                     + 'message to user %s', book.id, user.id)
                 bot.send_message(
                     chat_id=user.id,
                     text=self.lang[user.language_code]['sending_book'] \
                         .format(book.title)
                 )
                 self.logger.info('Sending book %s in %s format to ' \
                     + 'user %s', book.id, preferred_format, user.id)
                 bot.send_document(
                     chat_id=user.id,
                     document=open(file_format_path, 'rb'),
                     timeout=60
                 )
         else:
             self.logger.info('%s book not available in %s format', book.id,
                 preferred_format)
             self.scheduler.subscribe_to_book_conversion(book, book_path, \
                 user, bot, from_download=False)
         return ConversationHandler.END
     else:
         if not user.save_credentials:
             user_data['book'] = book
             user_data['book_path'] = book_path
             user_data['missing_images'] = missing_images
             user_data['is_toc_missing'] = is_toc_missing
             user_data['user'] = user
             self.logger.info('sending user %s password request message.',
                 user.id)
             bot.send_message(
                 user.id, self.lang[user.language_code]['request_password']
             )
             return self.PROCESS_PASSWORD
         else:
             self.download_pages(
                 bot,
                 update,
                 book_path,
                 missing_images,
                 is_toc_missing,
                 book,
                 user
             )
             return ConversationHandler.END
def main():
    """
    Dynamic functional connectivity states clustering
    """
    args = parse_args()
    input_paths = args.input
    pattern = args.pattern
    output_path = os.path.normpath(args.output)
    brain_areas = args.areas
    pca = args.pca
    lle = args.lle
    clusters = args.clusters
    t_phases = args.phases
    db = args.db
    autoen = args.autoen

    create_dir(output_path)

    new_outputs = []
    output_paths = []
    dfc_paths = []
    for input_path in input_paths:
        new_output = create_new_output_path(input_path, output_path)
        new_outputs.append(new_output)
        create_dir(new_output)
        output_paths.append(os.path.join(new_output, 'components_matrix.npz'))
        if pca:
            components, shape = preform_pca_on_dynamic_connectivity(
                input_path, new_output, brain_areas, pattern)
            fcd_matrix = functional_connectivity_dynamics(
                components, new_output)
            plot_functional_connectivity_matrix(fcd_matrix, new_output)

        if lle:
            components, shape = preform_lle_on_dynamic_connectivity(
                input_path, new_output, brain_areas, pattern)
            fcd_matrix = functional_connectivity_dynamics(
                components, new_output)
            plot_functional_connectivity_matrix(fcd_matrix, output_path)

        if autoen:
            dfc_path = dynamic_functional_connectivity(input_path, new_output,
                                                       brain_areas, pattern)
            dfc_paths.append(dfc_path)

    if autoen:
        dfc_all = preprocess_autoencoder(dfc_paths, output_path, brain_areas)
        encoded = autoencoder(dfc_all, output_path)

    if clusters is not None and autoen is False:
        # concatenate all data
        concatenated = convert_components(output_paths, output_path)
        kmeans_clustering_mean_score(concatenated, output_path, clusters)
    elif clusters is not None and autoen is True:
        kmeans_clustering_mean_score(encoded, output_path, clusters)

    elif db:
        concatenated = convert_components(output_paths, output_path)
        dbscan(concatenated, output_path)

    else:
        # perform clustering on data separately
        clusters = kmeans_clustering(components, output_path)
        plot_states_line(clusters, t_phases, output_path)
Example #23
0
from utilities import create_dir
from utilities import download_master_file
from utilities import read_master_file
import pandas as pd
import requests

http = 'http://'
gz = '.fastq.gz'
xml = '.xml'

folders = create_dir('PRJEB11419.txt', 'AG')
download_master_file('https://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=PRJEB11419&result=read_run&fields=sample_accession,secondary_sample_accession,tax_id,scientific_name,fastq_ftp&download=txt', folders[0])
download_lists = read_master_file(folders[0], '\t')

#download fastq and metadata
for i in range (len(download_lists[1])):

    total_link = http + str(download_lists[0][i])
    fastq_download = requests.get(total_link, allow_redirects=True)
    fastq_file_name = folders[2] + str(download_lists[1][i]) + gz
    open(fastq_file_name, 'wb').write(fastq_download.content)

    xml_location = 'https://www.ebi.ac.uk/ena/data/view/{}&display=xml'.format(download_lists[1][i])
    xml_download = requests.get(xml_location, allow_redirects=True)
    xml_file_name = folders[3] + str(download_lists[1][i]) + xml
    open(xml_file_name, 'wb').write(xml_download.content)
    with open(folders[1], "a") as f:
        f.write("Download do fastq {} e do xml da amostra {} feita com sucesso \n".format(fastq_file_name,xml_file_name))
Example #24
0
def preform_pca_on_dynamic_connectivity(input_path, output_path, brain_areas,
                                        pattern):
    """
    Computes the dynamic connectivity of brain areas with performing
    a PCA returning its matrix.

    :param input_path: path to input dir
    :type input_path: str
    :param output_path: path to output directory 
    :type output_path: str
    :param brain_areas: number of brain areas
    :type brain_areas: int
    :param pattern: pattern of input files
    :type pattern: str
    :return: PCA matrix, PCA matrix shape
    :rtype: np.ndarray, tuple
    """
    paths = return_paths_list(input_path, output_path, pattern=pattern)
    n_subjects = len(paths)
    array = np.genfromtxt(paths[0], delimiter=',')
    t_phases = array.shape[0]
    dFC = np.full((brain_areas, brain_areas), fill_value=0).astype(np.float64)
    pca_components = np.full((n_subjects, t_phases, (brain_areas * 2)),
                             fill_value=0).astype(np.float64)
    for n in tqdm(range(n_subjects)):
        phases = convert_to_phases(paths[n], output_path, brain_areas,
                                   t_phases, n)
        for t in range(0, t_phases):
            for i in range(0, brain_areas):
                for z in range(0, brain_areas):
                    if np.absolute(phases[i, t] - phases[z, t]) > np.pi:
                        dFC[i,
                            z] = np.cos(2 * np.pi - np.absolute(phases[i, t] -
                                                                phases[z, t]))
                    else:
                        dFC[i, z] = np.cos(
                            np.absolute(phases[i, t] - phases[z, t]))
            dfc_output = os.path.join(output_path, 'dFC')
            create_dir(dfc_output)
            np.savez(
                os.path.join(dfc_output, 'subject_{}_time_{}'.format(n, t)),
                dFC)
            pca = PCA(n_components=2)
            # normalize
            dFC = preprocessing.normalize(dFC, norm='l2')
            pca.fit(dFC)
            pca_dict = {
                'components':
                pca.components_.tolist(),
                'explained variance':
                pca.explained_variance_.tolist(),
                'explained mean variance':
                np.mean(pca.explained_variance_.tolist()),
                'explained variance ratio':
                pca.explained_variance_ratio_.tolist(),
                'mean':
                pca.mean_.tolist(),
                'n components':
                pca.n_components_,
                'noise variance':
                pca.noise_variance_.tolist()
            }
            with open(
                    os.path.join(output_path, 'PCA_results_{}_{}'.format(n,
                                                                         t)),
                    'w') as output:
                json.dump(pca_dict, output)
            pca_components[n, t, :] = \
                pca_dict['components'][0] + pca_dict['components'][1]
    # save the PCA matrix into a .npz file
    np.savez(os.path.join(output_path, 'components_matrix'), pca_components)
    return pca_components, pca_components.shape
Example #25
0
train_xx, train_yy, val_x, val_y, test_x, test_y, S_30 = am_util.load_data(args.data_dir + args.data_name + '/')
zero_row = np.where(~train_xx.any(axis=1))[0]
train_x = np.delete(train_xx, (zero_row), axis=0)
train_y = np.delete(train_yy,(zero_row ), axis=0)

[train_num, feature_dim] = train_x.shape
[train_num, output_dim] = train_y.shape
batch_num = int(train_num / args.batch_size)

## 3. fix seed for graph and numpy operations
tf.reset_default_graph()
tf.set_random_seed(args.seed)

## 4.create output folders to save the log files and trained model
#main_output_dir = ./output/data_name/
am_util.create_dir(os.path.join(args.output_dir + args.data_name)) # create forlder for output
main_output_dir = args.output_dir + args.data_name
##./output/data_name/model_name/
modelname = args.model_name +'_epsilon_' + str(args.epsilon) + '_reg_param1_' + str(args.reg_param1) +  '_reg_param2_' + str(args.reg_param2)
am_util.create_dir(os.path.join(args.output_dir, args.data_name, modelname)) ## to save predition result and log files
output_dir = main_output_dir + '/modelname/'
##./output/data_name/model_name/models
output_model_dir = args.output_dir + args.data_name + '/'+  modelname + "/models" ## forlder to save the model under ourput folder
am_util.create_dir(os.path.join(args.output_dir, args.data_name, modelname, "models"))  
##./output/data_name/model_name/logs
output_log_dir = args.output_dir + args.data_name + '/'+  modelname + "/logs" ## forlder to save the model under ourput folder
am_util.create_dir(os.path.join(args.output_dir, args.data_name, modelname, "logs"))  


## 5.define the full graph
class NNModel(object):
Example #26
0
def run_spatial(options):
    """
    Simulate runs for the SPATIAL algorithm
    """
    sch = options.school
    init = options.initialization
    runs = options.runs
    algo = options.algo

    print('\n... Starting {} algorithm ...\n'.format(algo))
    # SPATIAL hyper-parameters
    pop_size = 10
    iter_max = 1000

    # Read data files
    inputs = GetInputs(sch)
    args = inputs.get_inputs()

    # Values for the data set
    weight = 7    # Set weight in the range [0, 1] for calculating F = w * F1 + (1 - w) * F2
    set_params(weight, iter_max)
    # init_type = {1: 'seeded', 2: 'infeasible', 3: 'existing'}
    existing = gen_solutions(args, 3)

    print(' Present school boundary configuration has functional value : {:.3f}'.format(existing['func_val']))

    for r in range(runs):
        print('\n Run {} of {}\n'.format(r+1, algo))
        try:
            # Generate random seeds and use them for instantiating initial trial solutions
            seeds = [s + randrange(1000000) for s in range(pop_size)]
            solutions = copy.deepcopy(gen_solutions(args, init, pop_size, seeds))
            # Find the best solution and its functional value
            best_func_val, best_sol, stagnation = find_best_sol(solutions, math.inf, 0)
            print('Iter: 0 \t Best func_val: {:.3f}'.format(best_func_val))

            iteration = 0
            t_start = time.time()
            fval_iter = [(0, best_func_val)]    # List to save best solutions
            time_iter = [(0, time.time() - t_start)]

            # Iteratively improve the solutions using the two search operators
            for it in tqdm(range(iter_max)):
                run_module(args, 0, solutions)    # Local improvement
                run_module(args, 1, solutions)    # Spatially-aware recombination
                iteration = it + 1
                best_func_val, best_sol, stagnation = find_best_sol(solutions,
                                                                    best_func_val,
                                                                    stagnation)
                # Print the best result
                if iteration % 20 == 0:
                    print('Iter: {} \t Best func_val: {:.3f}'.format(iteration, best_func_val))
                    fval_iter.append((iteration, best_func_val))
                    time_iter.append((iteration, time.time() - t_start))

            # Printing the results
            t_elapsed = (time.time() - t_start) / 60.0  # measures in minutes
            best_solution = solutions[best_sol]

            print("Run: {} took {:.2f} min to execute {} iterations...\n"
                  " Obtained FuncVal: {:.3f} ".format(r + 1, t_elapsed, iteration, best_func_val))

            # Save the results
            print(' Checking the correctness of the solution...')
            correct = check_solution(args, best_solution['zones'])
            if correct:
                print('\n Correct solution.. Saving results .... ')
                w1, w2, epsilon, _ = parameters()
                params = get_params(AlgParams=get_params(w1=w1, w2=w2,
                                                         epsilon=epsilon,
                                                         pop_size=pop_size),
                                    Iteration=iteration,
                                    TimeElapsed=t_elapsed,
                                    School=sch)
                solu_info = {'Existing': existing,
                             'Final': best_solution,
                             'fval_vs_iter': fval_iter}
                run_results = {'properties': params,
                               'info': solu_info}
                write_path = create_dir('results', algo, sch)
                with open(join(write_path,
                               "run{}_{}_{}.json".format(r + 1, algo, sch)),
                          'w') as outfile:
                    json.dump(run_results, outfile)
            else:
                print('\n Incorrect solution. Has disconnected zones... \n')

        except Exception as e:
            print('Run {} incomplete due to error: {}'.format(r + 1, e))
def main():
    """
    FC states features
    """
    args = parse_args()
    input_path = args.input
    separate = args.separate
    output_path = args.output
    n_clusters = args.n_clusters
    starts_json = args.starts
    clusters = args.clusters

    create_dir(output_path)
    reduced_components = np.load(input_path)['arr_0'][:, :-1]
    variance = variance_of_states(reduced_components, output_path)
    labels = np.load(clusters)['arr_0']
    plot_variance(labels, variance, output_path)
    probabilities, lifets = distribution_probability_lifetime(
        labels, output_path, n_clusters)
    entropy_of_states(probabilities, output_path, n_clusters)

    if separate:
        probas_p_values = []
        lifetimes_p_values = []
        new_paths = separate_concat_array(input_path, starts_json, output_path,
                                          n_clusters)
        for path in tqdm(new_paths):
            output_p = os.path.join(output_path,
                                    os.path.basename(os.path.dirname(path)))
            create_dir(output_p)
            matrix = np.load(path)['arr_0']
            clusters = matrix[:, -1]
            reduced_task = matrix[:, :-1]
            probas, lifetimes = distribution_probability_lifetime(
                clusters, output_p, n_clusters)
            task_var = variance_of_states(reduced_task, output_p)
            plot_variance(clusters, task_var, output_p)
            entropy_of_states(probas, output_p, n_clusters)

        for a, b in itertools.combinations(new_paths, 2):
            group_a = np.load(a)['arr_0'][:, -1]
            group_b = np.load(b)['arr_0'][:, -1]
            a_name = os.path.basename(os.path.dirname(a))
            b_name = os.path.basename(os.path.dirname(b))
            output = os.path.join(output_path, a_name + '_' + b_name)
            create_dir(output)
            proba_a = probability_of_state(group_a, n_clusters, output)
            proba_a = {int(k): v for k, v in proba_a.items()}
            proba_b = probability_of_state(group_b, n_clusters, output)
            proba_b = {int(k): v for k, v in proba_b.items()}
            lt_a = mean_lifetime_of_state(group_a, n_clusters, output)
            lt_a = {int(k): v for k, v in lt_a.items()}
            lt_b = mean_lifetime_of_state(group_b, n_clusters, output)
            lt_b = {int(k): v for k, v in lt_b.items()}
            probas_a = [proba_a[i] for i in group_a]
            probas_b = [proba_b[y] for y in group_b]
            lts_a = [lt_a[m] for m in group_a]
            lts_b = [lt_b[n] for n in group_b]
            cond_a = [a_name for i in range(len(group_a))]
            cond_b = [b_name for z in range(len(group_b))]
            dict_prob = {
                'probability': probas_a + probas_b,
                'lifetime': lts_a + lts_b,
                'condition': cond_a + cond_b,
                'cluster': group_a.tolist() + group_b.tolist()
            }
            df = pd.DataFrame(data=dict_prob)
            df.to_csv(os.path.join(output, 'probas_lt_dataframe.csv'))
            plot_probabilities_barplots(df, output)
            plot_lifetimes_barplots(df, output)
            for c in tqdm(range(n_clusters)):
                df_n = df[df['cluster'] == c]
                con_a = df_n[df_n['condition'] == a_name]
                con_b = df_n[df_n['condition'] == b_name]
                t_prob, p_prob = students_t_test(
                    con_a['probability'], con_b['probability'],
                    os.path.join(output, str(c), 'probability'))
                t_lt, p_lt = students_t_test(
                    con_a['lifetime'], con_b['lifetime'],
                    os.path.join(output, str(c), 'lifetime'))
                probas_p_values.append(p_prob)
                lifetimes_p_values.append(p_lt)
        p_values = pd.DataFrame({
            'probabilities_p': probas_p_values,
            'lifetimes_p': lifetimes_p_values
        })
        p_values.to_csv(
            os.path.join(output_path, 'p_values_{}.csv'.format(n_clusters)))
 def __init__(self, name):
     self.userPath = util.create_dir(name)
     self.index = 1
Example #29
0
"""
    Project: Initial setup for Data Processing methods.
    Author: Goel, Ayush
    Date: 1st August 2019
"""

from os import listdir
from utilities import read_json_file, create_dir, LOGGER, MIN_DIM
from utilities.config import PROCESSED_DATA_DIR, BBOX_IMAGES_PATH, RAW_TRAIN_IMAGES_PATH, IMG_INFO_JSON_PATH, \
    NORMALISED_IMAGES_PATH, NORMALISED_BBOX_IMAGES_PATH

IMG_INFO_JSON = read_json_file(IMG_INFO_JSON_PATH)
ANNOTATIONS = IMG_INFO_JSON['annotations']
IMAGES = IMG_INFO_JSON['images']

IMAGES_IN_DIR = listdir(RAW_TRAIN_IMAGES_PATH)
create_dir(PROCESSED_DATA_DIR)
Example #30
0
from utilities import Configs
from utilities import create_dir

import data_precessor
from openpyxl import load_workbook

from urllib.request import urljoin

from os import path, sep

# add local modules into PATH
dir_path = path.dirname(path.realpath(__file__))
tmp_data_dir = dir_path + sep + '../output_excel_files'
create_dir(tmp_data_dir)

# def write_in_single_file(urls):
#     tables = []
#     for filename, url in urls.items():
#         print("Extracting tables from {}".format(filename))
#         soup = load_page(url)
#         tables += soup.findAll('table')
#
#    # if tables:
#    #     data_precessor.write_tables_to_excel(tables, 'output', 'www.unknownwebsite.com')


def write_in_multiple_files(urls):
    files = []
    for filename, url in urls.items():
        print("Extracting tables from {}".format(filename))
        try:
Example #31
0
    def run(self):
        self.parse_runlist()
        self.total_result = {}
        log_path = self.log_path + CURRENT_DATE
        self.result = log_path + '/' + 'result.log'
        if not utilities.is_path(log_path):
            utilities.create_dir(log_path)

        no_of_testcases = len(self.runlist_use)

        if no_of_testcases == 0:
            print "No testcases to run"
            sys.exit()
        count = 1
        #Execution started, start time
        start_time = time.time()
        for each_test in self.runlist_use:
            if self.set_number:
               set_no = "_%s" % self.set_number
               log_no = 'Log' + str(count) + set_no
            else:
               log_no = 'Log' + str(count)
            log_suffix = log_path + '/' + log_no
            if type(each_test) is tuple:
                each_name, each_test = each_test[0], each_test[1]
            status = self.run_teuthology(each_test, log_suffix)
            if self.tls_obj:
                testcase_IDList = self.get_testcaseID(log_suffix)
                if testcase_IDList:
                    for tcid in testcase_IDList:
                        try:
                            self.tls_obj.report_result(tcid, status,
                                                       self.ceph_version)
                        except Exception:
                            print ("Error while reporting Testcases: "
                                   "%s to testlink" % tcid)
            try:
                each_name
            except Exception:
                pass
            else:
                each_test = each_name

            if not status:
                self.total_result["%s. %s" %(count,each_test)] = 'Fail'
            else:
                self.total_result["%s. %s" %(count,each_test)] = 'Pass'
            count+=1
        #Exection completed, stop time

        stop_time = time.time()
        self.execution_time = int(stop_time - start_time)
        with open(self.result,'w')  as result_handle:
            result_handle.write(pprint.pformat(self.total_result))
        print pprint.pformat(self.total_result)
        #send mail
        if self.mail:
            self.sendmail()

        #stop all the nodes
        if not self.no_poweroff:
            self.poweroff_nodes()
        return
Example #32
0
from utilities import create_dir
from utilities import download_master_file
from utilities import read_master_file
import pandas as pd
import requests

http = 'http://'
gz = '.fastq.gz'
xml = '.xml'

folders = create_dir('PRJNA422434.txt', 'T2D')
download_master_file(
    'https://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=PRJNA422434&result=read_run&fields=sample_accession,secondary_sample_accession,tax_id,scientific_name,fastq_ftp&download=txt',
    folders[0])
download_lists = read_master_file(folders[0], '\t')

#download fastq and metadata
for i in range(len(download_lists[1])):

    fastq_ftp_str = ''
    fastq_ftp_list = []
    fastq_ftp_str = download_lists[0][i]
    fastq_ftp_list = fastq_ftp_str.split(';')
    total_link_1 = http + str(fastq_ftp_list[0])
    total_link_2 = http + str(fastq_ftp_list[1])
    fastq_1_download = requests.get(total_link_1, allow_redirects=True)
    fastq_file_name_1 = folders[2] + str(download_lists[1][i]) + '_1' + gz
    open(fastq_file_name_1, 'wb').write(fastq_1_download.content)
    fastq_2_download = requests.get(total_link_2, allow_redirects=True)
    fastq_file_name_2 = folders[2] + str(download_lists[1][i]) + '_2' + gz
    open(fastq_file_name_2, 'wb').write(fastq_2_download.content)
Example #33
0
from utilities import create_dir
from utilities import download_master_file
from utilities import read_master_file
import pandas as pd
import requests

http = 'http://'
gz = '.fastq.gz'
xml = '.xml'

folders = create_dir('PRJNA389280.txt', 'IBD')
download_master_file(
    'https://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=PRJNA389280&result=read_run&fields=sample_accession,secondary_sample_accession,tax_id,scientific_name,fastq_ftp&download=txt',
    folders[0])
download_lists = read_master_file(folders[0], '\t')

#download fastq and metadata
for i in range(len(download_lists[1])):

    fastq_ftp_str = ''
    fastq_ftp_list = []
    fastq_ftp_str = download_lists[0][i]
    fastq_ftp_list = fastq_ftp_str.split(';')
    total_link = http + str(fastq_ftp_list[0])
    fastq_download = requests.get(total_link, allow_redirects=True)
    fastq_file_name = folders[2] + str(download_lists[1][i]) + gz
    open(fastq_file_name, 'wb').write(fastq_download.content)

    xml_location = 'https://www.ebi.ac.uk/ena/data/view/{}&display=xml'.format(
        download_lists[2][i])
    xml_download = requests.get(xml_location, allow_redirects=True)