예제 #1
0
def loadState(file, name='data', raiseException=True):
    import compress_pickle
    pklfile = f'save_data/{file}/{name}.pkl'
    try:
        if (os.path.exists(pklfile)):
            # with open(pklfile, 'rb') as f:
            res = compress_pickle.load(pklfile)
            # f.close()
            saveState(res, file, name)
            os.remove(pklfile)
            return res
        # if(name=='data'):
        # from metric.CMbasedMetric import CMbasedMetric
        # from metric.event_confusion_matrix import event_confusion_matrix
        #     [run_info,datasetdscr,evalres]=compress_pickle.load(pklfile+'.lz4')
        #     for i in evalres:
        #         data=evalres[i]['test']
        #         Sdata=data.Sdata
        #         import combiner.SimpleCombiner
        #         com=combiner.SimpleCombiner.EmptyCombiner2()
        #         evalres[i]['test'].Sdata.pred_events =com.combine(Sdata.s_event_list,Sdata.set_window,data.predicted)
        #         evalres[i]['test'].event_cm     =event_confusion_matrix(Sdata.a_events,Sdata.pred_events,datasetdscr.activities)
        #         evalres[i]['test'].quality      =CMbasedMetric(data.event_cm,'macro',None)
        #     return [run_info,datasetdscr,evalres]
        return compress_pickle.load(pklfile + '.lz4')
    except:
        if (raiseException):
            raise
        return None
예제 #2
0
def remove_training_data():
    from compress_pickle import load

    training_data_path = "/home/pepamengual/UEP/trained_model/UEP_trained_model_4"
    skempi_data_path = "/home/pepamengual/UEP/trained_model/substracted_4"
    substracted_model = {}
    training_data = load(training_data_path,
                         compression="lzma",
                         set_default_extension=False)
    skempi_data = load(skempi_data_path,
                       compression="lzma",
                       set_default_extension=False)

    for environment, amino_acid_dict in training_data.items():
        for amino_acid, counts in amino_acid_dict.items():
            if environment in skempi_data and amino_acid in skempi_data[
                    environment]:
                substract = counts - skempi_data[environment][amino_acid]
                substracted_model.setdefault(environment, {}).setdefault(
                    amino_acid, substract)
            else:
                substracted_model.setdefault(environment, {}).setdefault(
                    amino_acid, counts)
    from compress_pickle import dump

    dump(substracted_model, "substracted_def_4", compression="lzma")
예제 #3
0
def parallelPlotter(i, xpNum, digs, fName, PT_IMG):
    xpNumCS = str(i + 1).zfill(4)
    print('* Exporting {}/{}'.format(xpNumCS, xpNumS), end='\r')
    # Name formatting -> tuple--------------------------------------------
    repsRatios = np.load(fName)
    fList = fName.split('/')[-1].split('-')[0].split('_')[1:]
    fList.append(fName.split('/')[-1].split('-')[1].split('_')[1])
    fKeys = tuple(list(map(int, fList)))
    # Select cmap --------------------------------------------------------
    (scalers, HD_DEP, IND_RAN, palette) = aux.selectDepVars('TTI', AOI)
    cmap = palette.reversed()
    # load TTI and TTO ---------------------------------------------------
    ttiR = pkl.load(PT_MTR+'{}_TTI_{}_mlr.bz'.format(AOI, QNT))
    tti = ttiR[fKeys][int(thi*100)]
    ttoR = pkl.load(PT_MTR+'{}_TTO_{}_mlr.bz'.format(AOI, QNT))
    tto = ttoR[fKeys][int(tho*100)]
    # Plotting-------------------------------------------------------------
    (fig, ax) = plt.subplots(nrows=1, ncols=1)
    ax.imshow(repsRatios, cmap=cmap)
    # add TTI-------------------------------------------------------------
    [plt.axvline(i, color='black', alpha=.65, lw=0.175, ls='-.') for i in tti]
    # add TTO-------------------------------------------------------------
    [plt.axvline(j, color='black', alpha=.75, lw=0.2, ls='dotted') for j in tto]
    # Save the figure------------------------------------------------------
    outName = fName.split('/')[-1].split('.')[0][:-4]
    plt.xlim(X_RAN)
    ax.axes.xaxis.set_ticklabels([])
    ax.axes.yaxis.set_ticklabels([])
    ax.axes.xaxis.set_visible(False)
    ax.axes.yaxis.set_visible(False)
    ax.xaxis.set_tick_params(size=0)
    ax.yaxis.set_tick_params(size=0)
    plt.savefig(PT_IMG+outName + '.png', bbox_inches='tight', pad_inches=0.01, dpi=500)
    plt.close("all")
예제 #4
0
def test_dump_load(dump_load):
    (
        message,
        path,
        compression,
        set_default_extension,
        optimize,
        expected_file,
        expected_fail,
    ) = dump_load
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", UserWarning)
        if expected_fail is None:
            dump(
                message,
                path,
                compression,
                set_default_extension=set_default_extension,
                optimize=optimize,
            )
            loaded_message = load(
                path, compression, set_default_extension=set_default_extension
            )
            assert loaded_message == message
        else:
            with pytest.raises(expected_fail):
                dump(
                    message,
                    path,
                    compression,
                    set_default_extension=set_default_extension,
                    optimize=optimize,
                )
            with pytest.raises(expected_fail):
                load(path, compression, set_default_extension=set_default_extension)
    def _get_hard_and_rel_sub_dicts(self, sample_names):
        if os.path.isfile(
                os.path.join(self.cache_dir, 'hard_sub_sample_dict.p.bz')):
            if os.path.isfile(
                    os.path.join(self.cache_dir, 'rel_sub_sample_dict.p.bz')):
                return compress_pickle.load(
                    os.path.join(
                        self.cache_dir,
                        'hard_sub_sample_dict.p.bz')), compress_pickle.load(
                            os.path.join(self.cache_dir,
                                         'rel_sub_sample_dict.p.bz'))

        hard_sub_sample_dict = {}
        rel_sub_sample_dict = {}
        count = 0
        tot_samples = len(sample_names)
        for sample_name in sample_names:
            count += 1
            sys.stdout.write(f'\r{sample_name}: {count}/{tot_samples}')
            abund_list = self.absolute_consolidated_abundance_dict[sample_name]
            if sum(abund_list) < 10000:
                continue

            # Make a redundant list of the seqs
            non_z = []
            for i, abund in enumerate(abund_list):
                if abund > 0:
                    non_z.append(i)

            redundant_list = []
            # prob_list = []
            tot = sum(abund_list)
            for i in non_z:
                seq = self.ordered_seq_names[i]
                abund = abund_list[i]
                # prob = abund/tot
                redundant_list.extend([seq for _ in range(abund)])
                # prob_list.extend([prob for _ in range(abund)])

            hard_sub_sample_list = np.random.choice(redundant_list,
                                                    10000,
                                                    replace=False)
            hard_abunds_dict = dict(Counter(hard_sub_sample_list))
            hard_sub_sample_dict[sample_name] = hard_abunds_dict

            # For soft
            norm_abund_dict = {
                self.ordered_seq_names[i]: int((abund_list[i] / tot) * 100)
                for i in non_z if int((abund_list[i] / tot) * 10000) > 0
            }
            rel_sub_sample_dict[sample_name] = norm_abund_dict

        compress_pickle.dump(
            hard_sub_sample_dict,
            os.path.join(self.cache_dir, 'hard_sub_sample_dict.p.bz'))
        compress_pickle.dump(
            rel_sub_sample_dict,
            os.path.join(self.cache_dir, 'rel_sub_sample_dict.p.bz'))
        return hard_sub_sample_dict, rel_sub_sample_dict
예제 #6
0
def landSelector(USR, LND):
    pth = aux.selectGeoPath(USR)
    if (LND == '02'):
        pts = pkl.load(path.join(pth, 'CLS_02.bz'))
    elif (LND == '10'):
        pts = pkl.load(path.join(pth, 'CLS_10.bz'))
    elif (LND == '01'):
        pts = pkl.load(path.join(pth, 'CLS_01.bz'))
    return pts['groups']
    def _make_abundance_df(self):
        # Dict that we will populate and then use to make the abundance_df
        df_dict = {}
        print('making abundance df')
        for readset in self.readsets:
            sys.stdout.write(f'\r{readset}')
            sample_qc_dir = os.path.join(self.parent.qc_dir, readset)
            # make a seq_name to abundance dict from the fasta and .names pair
            seq_abund_dict = self._make_abund_dict_from_names_path(
                readset=readset)
            # For the all_taxa, we will go sequence by sequence through the fasta file
            fasta_path = os.path.join(
                sample_qc_dir,
                'stability.trim.contigs.good.unique.abund.pcr.unique.fasta')
            fasta_file_as_list = EighteenSBase.decompress_read_compress(
                fasta_path)
            fasta_names = [
                line.split('\t')[0][1:] for line in fasta_file_as_list
                if line[0] == '>'
            ]

            # then load the three dictionaries
            sample_annotation_dict = compress_pickle.load(
                os.path.join(sample_qc_dir, 'sample_annotation_dict.p.bz'))
            coral_annotation_dict = compress_pickle.load(
                os.path.join(sample_qc_dir, 'coral_annotation_dict.p.bz'))
            sample_count_dict = {
                cat: 0
                for cat in self.parent.plotting_categories
            }

            if self.parent.plot_type == 'all_taxa':
                self._log_abundances_all_taxa(sample_annotation_dict,
                                              sample_count_dict,
                                              seq_abund_dict,
                                              coral_annotation_dict,
                                              fasta_names)
            elif self.parent.plot_type == 'all_coral_genus':
                self._log_abundances_all_coral_genus(sample_annotation_dict,
                                                     sample_count_dict,
                                                     seq_abund_dict,
                                                     coral_annotation_dict)
            else:
                raise NotImplementedError

            # Now add the collected abundances to the sample df_dict
            # Making them relative by dividing by the total of the sample_count_dd
            df_dict[readset] = [
                sample_count_dict[cat_key] / sum(sample_count_dict.values())
                for cat_key in self.parent.plotting_categories
            ]

        # Now create the df from the df_dict
        return pd.DataFrame.from_dict(data=df_dict,
                                      orient='index',
                                      columns=self.parent.plotting_categories)
    def __init__(self, parent, readset):
        # Here we will not assign all of the information we want from the provenance table
        # to a variable. We will only assign those things that we need to make our host-related
        # columns from. I think this is just the provenance_annotation.
        self.parent = parent
        self.readset = readset
        self.sample_id = self.parent.fastq_info_df.at[readset, 'sample-id']
        self.use = True
        # we will add a column which is do_not_use_reason
        # This will be a string value of either:
        # "tax_annotation_mismatch" -- > if the genetic tax annotation does not match the sample provenance annotation
        # "putative_intra_genus_contamination"
        # "inter_genus_contamination"
        # "low_host_sequence_abundance"
        # "not_of_target_genus"
        # "different_primary_sequence"
        self.do_not_use_reason = []
        self.sample_qc_dir = os.path.join(self.parent.qc_dir, readset)
        self.coral_annotation_dict = compress_pickle.load(
            os.path.join(self.sample_qc_dir, 'coral_annotation_dict.p.bz'))
        self.consolidated_host_seqs_abund_dict = compress_pickle.load(
            os.path.join(self.sample_qc_dir,
                         'consolidated_host_seqs_abund_dict.p.bz'))
        self.rel_all_seq_abundance_dict = compress_pickle.load(
            os.path.join(self.sample_qc_dir,
                         'rel_all_seq_abundance_dict.p.bz'))
        self.coral_tax_rel_count_dd = self._make_coral_tax_rel_count_dd()
        self.sorted_coral_tax_dict_keys = sorted(
            self.coral_tax_rel_count_dd,
            key=self.coral_tax_rel_count_dd.get,
            reverse=True)
        self.genus_18S_taxonomic_annotation = self.sorted_coral_tax_dict_keys[
            0]
        self.provenance_annotation = self.parent.sample_provenance_df.at[
            self.sample_id,
            'Sample Material label, organismal system level, taxonomic, nominal']

        # The remainder of the variables that we need to populate
        self.is_provenance_tax_annotation_correct = None
        self.inter_genus_contamination_rel_abund = None
        self.is_inter_genus_contamination = None
        self.primary_sequence = None
        self.host_rel_abund = None
        self.putative_intra_genus_contamination_ratio = None
        self.is_putative_intra_genus_contamination = None
        self.is_representative_for_sample = None
        self.post_qc_seq_depth = None

        # Variables that are only associated with processing a Heliopora samples
        self.sample_annotation_dict = None
        self.fasta_dict = None
        self.all_tax_count_dd = None
 def _init_color_dict(self):
     if self.plot_type == 'all_taxa':
         col_dict = {
             'Porites': '#FFFF00',
             'Pocillopora': '#87CEFA',
             'Millepora': '#FF6347',
             'other_coral': '#C0C0C0',
             'Symbiodiniaceae': '#00FF00',
             'other_taxa': '#696969',
             'not_annotated': '#282828'
         }
         return [
             'Pocillopora', 'Porites', 'Millepora', 'other_coral',
             'Symbiodiniaceae', 'other_taxa', 'not_annotated'
         ], col_dict
     elif self.plot_type == 'all_coral_genus':
         col_dict = {
             'Porites': '#FFFF00',
             'Pocillopora': '#87CEFA',
             'Millepora': '#FF6347',
             'other_coral': '#C0C0C0'
         }
         return ['Pocillopora', 'Porites', 'Millepora',
                 'other_coral'], col_dict
     elif self.plot_type in ['all_coral_sequence', 'minor_coral_sequence']:
         col_dict = compress_pickle.load(
             os.path.join(self.cache_dir,
                          'all_coral_sequence_color_dict.p.bz'))
         return None, col_dict
     else:
         raise NotImplementedError()
예제 #10
0
def unpickle_file(filename):
    """Returns the result of unpickling the file content."""
    return compress_pickle.load(filename,
                                compression="lzma",
                                set_default_extension=False)
    with open(filename, 'rb') as f:
        return pickle.load(f)
예제 #11
0
 def __init__(self, kernel, machine, cores=1):
     """Initialize cache simulation based predictor from kernel and machine object."""
     CachePredictor.__init__(self, kernel, machine, cores)
     if isinstance(kernel, KernelCode):
         # Make use of caching for symbolic LC representation:
         file_name = 'CSIM_analysis.pickle.lzma'
         file_path = kernel.get_intermediate_location(
             file_name,
             machine_and_compiler_dependent=False,
             other_dependencies=[str(cores)] +
             [str(t) for t in self.kernel.constants.items()])
         lock_mode, lock_fp = kernel.lock_intermediate(file_path)
         if lock_mode == fcntl.LOCK_SH:
             # use cache
             cache = compress_pickle.load(file_path)
             lock_fp.close()  # release lock
             self.first_dim_factor = cache['first_dim_factor']
             self.stats = cache['stats']
         else:  # lock_mode == fcntl.LOCK_EX
             # needs update
             self.simulate()
             compress_pickle.dump(
                 {
                     'first_dim_factor': self.first_dim_factor,
                     'stats': self.stats
                 }, file_path)
             lock_fp.close()  # release lock
     else:
         # No caching support without filename for kernel code
         self.simulate()
예제 #12
0
    def __init__(self, kernel, machine, cores=1, symbolic=False):
        """Initialize layer condition based predictor from kernel and machine object."""
        CachePredictor.__init__(self, kernel, machine, cores=cores)
        if isinstance(kernel, KernelCode):
            # Make use of caching for symbolic LC representation:
            file_name = 'LC_analysis.pickle.lzma'
            file_path = kernel.get_intermediate_location(
                file_name,
                machine_and_compiler_dependent=False,
                other_dependencies=[str(cores)])
            lock_mode, lock_fp = kernel.lock_intermediate(file_path)
            if lock_mode == fcntl.LOCK_SH:
                # use cache
                self.results = compress_pickle.load(file_path)
                lock_fp.close()  # release lock
            else:  # lock_mode == fcntl.LOCK_EX
                # needs update
                self.build_symbolic_LCs()
                compress_pickle.dump(self.results, file_path)
                lock_fp.close()  # release lock
        else:
            # No caching support without filename for kernel code
            self.build_symbolic_LCs()

        if not symbolic:
            self.desymbolize()
예제 #13
0
def exportPstTracesParallel(
        exIx, expsNum,
        STABLE_T, THS, QNT, STYLE, PT_IMG, 
        border=True, borderColor='#322E2D', borderWidth=1, 
        labelPos=(.7, .9), xpsNum=0, digs=3, 
        autoAspect=False, popScaler=1,
        wopPrint=True, cptPrint=True, poePrint=True, mnfPrint=True, 
        ticksHide=True, transparent=True, sampRate=1, labelspacing=.1
    ):
    (ix, repFile, tti, tto, wop, mnf, _, poe, cpt) = exIx
    repDta = pkl.load(repFile)
    # Print to terminal -------------------------------------------------------
    padi = str(ix+1).zfill(digs)
    fmtStr = '{}+ File: {}/{}'
    print(fmtStr.format(monet.CBBL, padi, expsNum, monet.CEND), end='\r')
    # Traces ------------------------------------------------------------------
    pop = repDta['landscapes'][0][STABLE_T][-1]
    # STYLE['yRange'] = (0,  pop*popScaler)
    monet.exportTracesPlot(
        repDta, repFile.split('/')[-1][:-6]+str(QNT), STYLE, PT_IMG,
        vLines=[tti, tto, 0], hLines=[mnf*pop], labelPos=labelPos, 
        border=border, borderColor=borderColor, borderWidth=borderWidth,
        autoAspect=autoAspect, popScaler=popScaler,
        wop=wop, wopPrint=wopPrint, 
        cpt=cpt, cptPrint=cptPrint,
        poe=poe, poePrint=poePrint,
        mnf=mnf, mnfPrint=mnfPrint,
        ticksHide=ticksHide, transparent=True, 
        sampRate=sampRate, labelspacing=labelspacing
    )
    return None
예제 #14
0
def test_dump_load_on_filestreams(simple_dump_and_remove):
    path, compression, message, optimize = simple_dump_and_remove
    read_mode = "rb"  # get_compression_read_mode(compression)
    write_mode = "wb"  # get_compression_write_mode(compression)
    with open(path, write_mode) as f:
        dump(message, f, compression=compression, optimize=optimize)
    with open(path, read_mode) as f:
        raw_content = f.read()
        f.seek(0)
        loaded_message = load(f, compression=compression)
    assert loaded_message == message
    os.remove(path)
    dump(
        message,
        path,
        compression=compression,
        set_default_extension=False,
        optimize=optimize,
    )
    with open(path, read_mode) as f:
        benchmark = f.read()
    # zipfile compression stores the data in a zip archive. The archive then
    # contains a file with the data. Said file's mtime will always be
    # different between the two dump calls, so we skip the follwing assertion
    if compression != "zipfile":
        assert raw_content == benchmark
예제 #15
0
    def _get_snp_classifications(self, genus):
        if genus == 'Pocillopora':
            # First check to see if the cached version exists
            snp_cache_dir = os.path.join(self.input_dir_18s,
                                         'snp_classifications',
                                         f'poc_snp_class_df.p.bz')
        elif genus == 'Porites':
            snp_cache_dir = os.path.join(self.input_dir_18s,
                                         'snp_classifications',
                                         f'por_snp_class_df.p.bz')

        if os.path.exists(snp_cache_dir):
            return compress_pickle.load(snp_cache_dir)
        else:
            # Need to create it from scratch
            if genus == 'Pocillopora':
                raw_snp_class_path = os.path.join(
                    self.input_dir_18s, 'snp_classifications',
                    f'POC_SNP_classifications.csv')
            elif genus == 'Porites':
                raw_snp_class_path = os.path.join(
                    self.input_dir_18s, 'snp_classifications',
                    f'POR_SNP_classifications.csv')

            snp_class_df = pd.read_csv(raw_snp_class_path, index_col=0)
            snp_class_df.index = self._convert_index_to_sample_ids(
                snp_class_df.index)
            snp_class_df.dropna(inplace=True)
            snp_class_df.columns = ['label']
            compress_pickle.dump(snp_class_df, snp_cache_dir)
            return snp_class_df
    def _pop_for_heliopora_sample(self):
        self.use = False
        self.do_not_use_reason.append("not_of_target_genus")
        self.sample_annotation_dict = compress_pickle.load(
            os.path.join(self.sample_qc_dir, 'sample_annotation_dict.p.bz'))
        self.fasta_dict = self._make_fasta_dict()
        self.all_tax_count_dd = self._make_all_tax_count_dd()
        self.genus_18S_taxonomic_annotation = sorted(
            self.all_tax_count_dd, key=self.all_tax_count_dd.get,
            reverse=True)[0]
        self._set_is_provenance_tax_annotation_correct()

        # set inter coral contamination fields to nan
        self.inter_genus_contamination_rel_abund = np.nan
        self.is_inter_genus_contamination = np.nan
        # primary seq should be the most abundant seq in the sample
        self.primary_sequence = self.fasta_dict[sorted(
            self.rel_all_seq_abundance_dict,
            key=self.rel_all_seq_abundance_dict.get,
            reverse=True)[0]]
        # But we set is different to nan
        self.is_different_primary_sequence = np.nan
        # host rel abund will be the abund of heliopora
        self._set_host_rel_abund_heliopora()
        # Set the intragenus to nan
        self.putative_intra_genus_contamination_ratio = np.nan
        self.is_putative_intra_genus_contamination = np.nan
        self._set_post_qc_seq_depth()
        self._set_is_representative_for_sample()
        self._populate_coral_meta_info_table_dict()
예제 #17
0
def exportPreTracesParallel(exIx,
                            STYLE,
                            PT_IMG,
                            border=True,
                            borderColor='#322E2D',
                            borderWidth=1,
                            autoAspect=False,
                            xpNum=0,
                            digs=3,
                            vLines=[0, 0],
                            hLines=[0],
                            popScaler=1,
                            sampRate=1):
    monet.printProgress(exIx[0], xpNum, digs)
    repFilePath = exIx[1][1]
    repDta = pkl.load(repFilePath)
    name = path.splitext(repFilePath.split('/')[-1])[0][:-4]
    monet.exportTracesPlot(repDta,
                           name,
                           STYLE,
                           PT_IMG,
                           wopPrint=False,
                           autoAspect=autoAspect,
                           border=border,
                           borderColor=borderColor,
                           borderWidth=borderWidth,
                           sampRate=sampRate)
    return None
예제 #18
0
    def load_rearrange_data_from_path(
        cls,
        stage: str,
        base_dir: Optional[str] = None,
    ) -> Dict[str, List[Dict[str, Any]]]:
        stage = stage.lower()

        if stage == "valid":
            stage = "val"

        data_path = os.path.abspath(os.path.join(base_dir, f"{stage}.pkl.gz"))
        if not os.path.exists(data_path):
            raise RuntimeError(f"No data at path {data_path}")

        data = compress_pickle.load(path=data_path)
        for scene in data:
            for ind, task_spec_dict in enumerate(data[scene]):
                task_spec_dict["scene"] = scene

                if "index" not in task_spec_dict:
                    task_spec_dict["index"] = ind

                if "stage" not in task_spec_dict:
                    task_spec_dict["stage"] = stage
        return data
예제 #19
0
def main():
    skempi_path = "skempi/skempi_v2.csv"

    model_trained = "trained_model/UEP_trained_model_4"
    training_data = load(model_trained,
                         compression="lzma",
                         set_default_extension=False)
    skempi_processed_data_single, skempi_processed_data_multiple, skempi_processed_data_single_no_renamed = read_skempi.process_skempi_data(
        skempi_path)

    data = {}
    for entry, value in skempi_processed_data_single.items():
        pdb = entry.split("_")[0]
        mutation_info = entry.split("_")[-1]
        chain = mutation_info[1]
        mutation_cleaned = "{}{}".format(mutation_info[0], mutation_info[2:])
        data.setdefault(pdb, []).append([chain, mutation_cleaned])

    for pdb, info_list in data.items():
        with open("skempi/mcsm/mutation_lists/{}.txt".format(pdb), "w") as f:
            for mutation in info_list:
                chain = mutation[0]
                mutation_cleaned = mutation[1]
                to_write = "{} {}".format(chain, mutation_cleaned)
                f.write(to_write + "\n")
예제 #20
0
def exportPreTracesPlotWrapper(expIx,
                               fLists,
                               STYLE,
                               PT_IMG,
                               border=True,
                               borderColor='#322E2D',
                               borderWidth=1,
                               autoAspect=False,
                               xpNum=0,
                               digs=3,
                               vLines=[0, 0],
                               hLines=[0],
                               popScaler=1,
                               transparent=False):
    ter.printProgress(expIx + 1, xpNum, digs)
    (_, repDta) = [pkl.load(file) for file in (fLists[expIx])]
    name = path.splitext(fLists[expIx][0].split('/')[-1])[0][:-4]
    # Export plots --------------------------------------------------------
    exportTracesPlot(repDta,
                     name,
                     STYLE,
                     PT_IMG,
                     wopPrint=False,
                     autoAspect=autoAspect,
                     border=border,
                     borderColor=borderColor,
                     borderWidth=borderWidth,
                     transparent=transparent,
                     vLines=vLines,
                     hLines=hLines)
    return None
 def __init__(self):
     super().__init__()
     self._add_additional_info_to_info_df()
     self.absolute_consolidated_abundance_dict = compress_pickle.load(
         os.path.join(self.cache_dir,
                      'consolidated_df_dict_output_tables.p.bz'))
     self.fig, self.ax = plt.subplots(6, 1)
예제 #22
0
def get_scene_to_obj_name_to_seen_positions():
    scene_to_task_spec_dicts = compress_pickle.load(
        os.path.join(STARTER_DATA_DIR, f"train.pkl.gz"))
    assert len(scene_to_task_spec_dicts) == 80 and all(
        len(v) == 50 for v in scene_to_task_spec_dicts.values())

    scene_to_obj_name_to_positions = {}
    for scene in tqdm.tqdm(scene_to_task_spec_dicts):
        obj_name_to_positions = defaultdict(lambda: [])
        for task_spec_dict in scene_to_task_spec_dicts[scene]:
            for od in task_spec_dict["openable_data"]:
                obj_name_to_positions[od["name"]].extend(
                    (od["start_openness"], od["target_openness"]))

            for sp, tp in zip(task_spec_dict["starting_poses"],
                              task_spec_dict["target_poses"]):
                assert sp["name"] == tp["name"]

                position_dist = IThorEnvironment.position_dist(
                    sp["position"], tp["position"])
                rotation_dist = IThorEnvironment.angle_between_rotations(
                    sp["rotation"], tp["rotation"])
                if position_dist >= 1e-2 or rotation_dist >= 5:
                    obj_name_to_positions[sp["name"]].append(
                        [sp["position"][k] for k in ["x", "y", "z"]])
                    obj_name_to_positions[sp["name"]].append(
                        [tp["position"][k] for k in ["x", "y", "z"]])
        scene_to_obj_name_to_positions[scene] = {
            k: np.array(v)
            for k, v in obj_name_to_positions.items()
        }

    return scene_to_obj_name_to_positions
예제 #23
0
def parse_information_files(list_of_enzyme_commission_numbers,
                            list_of_databases, cpus):
    output_path = "data/parsed_raw/parsed_dictionary_of_raw_data.gzip"
    if not os.path.exists(output_path):
        print("Creating information compressed file. Saved at {}".format(
            output_path))
        work_list = []
        for enzyme in list_of_enzyme_commission_numbers:
            for database in list_of_databases:
                file_path = "data/raw/{}_{}_raw.txt".format(enzyme, database)
                work_list.append((file_path, ))
        result_list = run_multiprocessing.run_mp(work_list, cpus, process_file)
        processed_results = split_results(result_list)
        dump(processed_results,
             output_path,
             compression="gzip",
             set_default_extension=False)
    else:
        print("Information file have been found at {}. Loading it.".format(
            output_path))
        processed_results = load(output_path,
                                 compression="gzip",
                                 set_default_extension=False)
        print("File loaded")
    return processed_results
예제 #24
0
    def load_pickle(self, filename):
        '''
        Reloads compressed binary file saved by save_pickle.
        '''
        try:
            infile = open(filename, 'rb')
        except IOError:
            print("File not found: \"{}\"".format(filename))
            return
        indata = load(infile, compression="gzip")

        infile.close()

        self.net_thickness = indata[0]
        self.net_width = indata[1]
        self.net_length = indata[2]
        self.slots_in = indata[3]
        self.slots_out = indata[4]
        self.angle_in = indata[5]
        self.angle_out = indata[6]
        self.spi_in = indata[7]
        self.tol = indata[8]
        self.knots = indata[9]
        self.probe_pts = indata[10]
        self.probe_samples = indata[11]

        self.spi_in = (self.slots_in /
                       (self.net_width * sin((pi / 2.) + self.angle_in)))
        self.spi_out = (self.slots_out /
                        (self.net_width * sin((pi / 2.) + self.angle_out)))
예제 #25
0
파일: app.py 프로젝트: mcatalano26/cmps-cap
def showPosts():
    comments = []

    reddit_link = request.form.get('reddit_link')
    submission = reddit.submission(url=reddit_link)
    title = submission.title
    selftext = submission.selftext
    submission.comments.replace_more(limit=0)

    swearwords_df = pd.read_csv('files/edited-swear-words.csv')
    swearwords = swearwords_df.swear.tolist()
    features = [
        'profanity', 'length', 'adjWordScore', 'NER_count', 'NER_match',
        'WordScore', 'WholeScore', 'contains_url', 'no_url_WordScore',
        'no_url_WholeScore', 'WordScoreNoStop', 'WholeScoreNoStop',
        'no_url_or_stops_WholeScore', 'no_url_or_stops_WordScore'
    ]
    our_model = load("updated_model.pkl",
                     compression="lzma",
                     set_default_extension=False)
    punctuation_lst = [
        ',', '.', '!', '?', '<', '>', '/', ':', ';', '\'', '\"', '[', '{', ']',
        '}', '|', '\\', '`', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(',
        ')', '-', '_', '=', '+'
    ]

    article_url = submission.url
    cleaned_article_text = ab.clean_article(article_url)
    no_url_article_text = ab.remove_urls(cleaned_article_text)
    no_stop_article_text = ab.remove_stopwords(cleaned_article_text)
    no_stop_or_url_article_text = ab.remove_urls(no_stop_article_text)

    # rank by upvotes
    for comment in submission.comments:
        is_good_comment = ab.judgeComment(
            comment, reddit_link, swearwords, features, our_model,
            cleaned_article_text, no_url_article_text, no_stop_article_text,
            no_stop_or_url_article_text, punctuation_lst)
        confidence = is_good_comment[2]
        if (is_good_comment[0]):
            comment.body = vc.visualize(comment.body)
            comment.body = vc.good_comment(comment.body, confidence)
            comments.append(comment.body.split())
        else:
            comment.body = vc.visualize(comment.body)
            comment.body = vc.bad_comment(comment.body, confidence)
            comments.append(comment.body.split())

    return render_template(
        'post.html',
        comments=comments,
        title=title,
        selftext=selftext,
        reddit_url=reddit_link,
        cleaned_article_text=cleaned_article_text,
        no_url_article_text=no_url_article_text,
        no_stop_article_text=no_stop_article_text,
        no_stop_or_url_article_text=no_stop_or_url_article_text,
        exp=None)
예제 #26
0
파일: replay.py 프로젝트: zsyzgu/TouchType
 def init(self):
     self.capture = cv2.VideoCapture('data/' + self.file_name + '.avi')
     self.screenshots = []
     self.timestamps = pickle.load(
         open('data/' + self.file_name + '.timestamp', 'rb'))
     self.frames = compress_pickle.load('data/' + self.file_name + '.gz')
     self.frame_id = 0
     self.auto_play = False
예제 #27
0
def cache_read(file_name, cache_prefix=True):
    # file_name = cn_(file_name) if cache_prefix else file_name
    if os.path.exists(file_name):
        with open(file_name, 'rb') as f:
            return compress_pickle.load(f, compression="lzma")
            # return pickle.load(f)
    else:
        return None
예제 #28
0
def landSelector(land, PT_ROT):
    if land == 'PAN':
        lnd = (list(range(0, 62)), )
        return lnd
    else:
        pth = ''.join(PT_ROT.replace('/'+land+'/', ''))
        lnd = pkl.load(path.join(pth, 'GEO', 'clusters.bz'))
        return lnd
예제 #29
0
    def read(self, fsnum: int = 0, fsoffset: int = 0,
             random_addition: bool = True, random_read: bool = True):
        """
        Load files from the given directory 'self.datadir'.

        - If 'fsnum' is 0, all files will be loaded.
        - If 'fsoffset' is 0, all files will be loaded.

        Parameters
        ----------
        fsnum : int, optional
            Number of files to load. The default is 0.
        fsoffset : int, optional
            Offset from the first file to start counting of files.
            The default is 0.
        random_addition : bool, optional
            Read random files from the self.datadir if 'fsnum' is more than files number till the end of these. 
            The default is True.
        random_read : bool, optional
            Read random files, not from the ordered list of files.
            The default is True.

        Returns
        -------
        None.

        """
        if fsnum != 0:
            self.fsnum = fsnum
        if fsoffset != 0:
            self.fsoffset = fsoffset

        datapaths = list_files(
            self.datadir, self.vendor, self.fsoffset, self.fsnum)

        if random_read or (len(datapaths) < self.fsnum and random_addition):
            _datapaths = list_files(self.datadir, self.vendor, 0, 0)
            indxs = np.arange(len(_datapaths))
            rnd_indxs = rsample(
                indxs[indxs != self.fsoffset].tolist(), self.fsnum - 1)
            datapaths = [_datapaths[i] for i in rnd_indxs + [self.fsoffset]]

        # Check if file was processed
        files_processed = [k for k, v in self.rawdata.items() if v is not None]
        files_to_remove = [k for k in self.rawdata if k not in datapaths]

        for path in files_to_remove:
            del self.rawdata[path]

        for path in datapaths:
            if path not in files_processed:
                self.rawdata[path] = None

        if self.memo_file:
            self.rawdata.update(load(self.memo_file, compression='lzma',
                                     set_default_extension=False))

        self.filesnum = len(self.rawdata) if not fsnum else fsnum
    def _populate_master_seq_info_dict(self):
        if os.path.isfile(
                os.path.join(self.cache_dir, 'master_seq_info_dict.p.bz')):
            self.master_seq_info_dict = compress_pickle.load(
                os.path.join(self.cache_dir, 'master_seq_info_dict.p.bz'))
        else:
            print(
                'Collecting sequence information (original seqs): first pass')
            for readset in self.coral_readsets:
                sys.stdout.write(f'\r{readset}')
                # read in the fasta file
                fasta_file = self._read_in_fasta_file(readset)

                # read in the name file and make an abundance dictionary
                name_rel_abund_dict = self._make_rel_abund_dict_from_names_path(
                    readset)

                # read in the sample taxonomy dictionary
                sample_annotation_dict = compress_pickle.load(
                    os.path.join(self.qc_dir, readset,
                                 'sample_annotation_dict.p.bz'))

                # for each sequence in the fasta file
                # if not already in the dict, init with the rel abund and tax info
                # else simply add to the cumulative abund
                for i in range(0, len(fasta_file), 2):
                    seq_name = fasta_file[i].split('\t')[0][1:]
                    seq = fasta_file[i + 1]
                    try:
                        self.master_seq_info_dict[seq][
                            'cummulative_abund'] += name_rel_abund_dict[
                                seq_name]
                    except KeyError:
                        try:
                            tax_tup = sample_annotation_dict[seq_name]
                        except KeyError:
                            tax_tup = ('not_annotated', 'not_annotated',
                                       'not_annotated')
                        self.master_seq_info_dict[seq] = {
                            'cummulative_abund': name_rel_abund_dict[seq_name],
                            'tax_annotation': tax_tup
                        }
            compress_pickle.dump(
                self.master_seq_info_dict,
                os.path.join(self.cache_dir, 'master_seq_info_dict.p.bz'))