Ejemplo n.º 1
0
    def __init__(self, config):
        DataContainer.__init__(self, config)

        try:
            self.timepoint = int(config['timepoint'])
        except KeyError as key:
            raise EnrichError("Missing required config value '%s'" % key, 
                              self.name)
        except ValueError as value:
            raise EnrichError("Invalid parameter value %s" % value, self.name)

        if 'align variants' in config:
            if config['align variants']:
                self.aligner = Aligner()
            else:
                self.aligner = None
        else:
            self.aligner = None

        if 'report filtered reads' in config:
            self.report_filtered_reads = config['report filtered reads']
        else:
            self.report_filtered_reads = self.verbose

        # initialize data
        self.counts = dict()        # pandas dataframes
        self.counts_file = dict()   # paths to saved counts
        self.filters = None         # dictionary
        self.filter_stats = None    # dictionary
Ejemplo n.º 2
0
    def __init__(self, config):
        DataContainer.__init__(self, config)

        try:
            self.timepoint = int(config['timepoint'])
        except KeyError as key:
            raise EnrichError("Missing required config value '%s'" % key,
                              self.name)
        except ValueError as value:
            raise EnrichError("Invalid parameter value %s" % value, self.name)

        if 'align variants' in config:
            if config['align variants']:
                self.aligner = Aligner()
            else:
                self.aligner = None
        else:
            self.aligner = None

        if 'report filtered reads' in config:
            self.report_filtered_reads = config['report filtered reads']
        else:
            self.report_filtered_reads = self.verbose

        # initialize data
        self.counts = dict()  # pandas dataframes
        self.counts_file = dict()  # paths to saved counts
        self.filters = None  # dictionary
        self.filter_stats = None  # dictionary
Ejemplo n.º 3
0
    def __init__(self, config):
        DataContainer.__init__(self, config)
        self.conditions = dict()
        self.control = None
        self.use_scores = True
        self.normalize_wt = False

        try:
            if 'normalize wt' in config:
                if config['normalize wt'] is True:
                    self.normalize_wt = True
            for cnd in config['conditions']:
                if not cnd['label'].isalnum():
                    raise EnrichError(
                        "Alphanumeric label required for condition '{label}'".
                        format(label=cnd['label']), self.name)
                for sel_config in cnd[
                        'selections']:  # assign output base if not present
                    if 'output directory' not in sel_config:
                        sel_config['output directory'] = self.output_base
                if cnd['label'] not in self.conditions:
                    self.conditions[cnd['label']] = [
                        selection.Selection(x) for x in cnd['selections']
                    ]
                else:
                    raise EnrichError(
                        "Non-unique condition label '{label}'".format(
                            label=cnd['label']), self.name)
                if 'control' in cnd:
                    if cnd['control']:
                        if self.control is None:
                            self.control = self.conditions[cnd['label']]
                        else:
                            raise EnrichError("Multiple control conditions",
                                              self.name)
        except KeyError as key:
            raise EnrichError(
                "Missing required config value {key}".format(key=key),
                self.name)

        all_selections = list()
        for key in self.conditions:
            all_selections.extend(self.conditions[key])
        for dtype in all_selections[0].df_dict:
            if all(dtype in x.df_dict for x in all_selections):
                self.df_dict[dtype] = True
        if len(self.df_dict.keys()) == 0:
            raise EnrichError(
                "No enrichment data present across all selections", self.name)

        # ensure consistency for score usage
        if not all(x.use_scores for x in all_selections):
            self.use_scores = False

        # ensure consistency for wild type normalization
        for sel in all_selections:
            sel.normalize_wt = self.normalize_wt
Ejemplo n.º 4
0
def records_to_dm(records):
    dc_dict = {}
    for activity, records in records.items():
        fa_array = [e[1] for e in records]
        fn_array = [e[2] for e in records]
        threshold = [e[0] for e in records]
        dc = DataContainer(fa_array, fn_array, threshold, label=activity)
        dc.line_options['color'] = None
        dc_dict[activity] = dc
    return dc_dict
Ejemplo n.º 5
0
    def __init__(self, config):
        DataContainer.__init__(self, config)

        try:
            self.timepoint = int(config['timepoint'])
        except KeyError as key:
            raise EnrichError("Missing required config value '{key}'".format(key=key), 
                              self.name)
        except ValueError as value:
            raise EnrichError("Invalid parameter value {value}".format(value=value), self.name)

        if 'report filtered reads' in config:
            self.report_filtered = config['report filtered reads']
        else:
            self.report_filtered = False
Ejemplo n.º 6
0
    def __init__(self, config):
        DataContainer.__init__(self, config)
        self.conditions = dict()
        self.control = None
        self.use_scores = True
        self.normalize_wt = False

        try:
            if 'normalize wt' in config:
                if config['normalize wt'] is True:
                    self.normalize_wt = True
            for cnd in config['conditions']:
                if not cnd['label'].isalnum():
                    raise EnrichError("Alphanumeric label required for condition '{label}'".format(label=cnd['label']), self.name)
                for sel_config in cnd['selections']: # assign output base if not present
                    if 'output directory' not in sel_config:
                        sel_config['output directory'] = self.output_base
                if cnd['label'] not in self.conditions:
                    self.conditions[cnd['label']] = [selection.Selection(x) for x in cnd['selections']]
                else:
                    raise EnrichError("Non-unique condition label '{label}'".format(label=cnd['label']), self.name)
                if 'control' in cnd:
                    if cnd['control']:
                        if self.control is None:
                            self.control = self.conditions[cnd['label']]
                        else:
                            raise EnrichError("Multiple control conditions", self.name)
        except KeyError as key:
            raise EnrichError("Missing required config value {key}".format(key=key), 
                              self.name)

        all_selections = list()
        for key in self.conditions:
            all_selections.extend(self.conditions[key])
        for dtype in all_selections[0].df_dict:
            if all(dtype in x.df_dict for x in all_selections):
                self.df_dict[dtype] = True
        if len(self.df_dict.keys()) == 0:
            raise EnrichError("No enrichment data present across all selections", 
                              self.name)

        # ensure consistency for score usage
        if not all(x.use_scores for x in all_selections):
            self.use_scores = False

        # ensure consistency for wild type normalization
        for sel in all_selections:
            sel.normalize_wt = self.normalize_wt
Ejemplo n.º 7
0
    def call_loader(path, logger):
        try:
            if os.path.isfile(
                    path
            ):  # Use this instead of catching  FileNotFoundError for Python2 support
                dc = DataContainer.load(path)
                if hasattr(dc, "data_container_version"
                           ) and dc.data_container_version == "2.0":
                    return dc
                else:
                    logger.error(
                        "Error: This type of data container is not supported (data_container_version not found or < 2.0)"
                    )
                    DMRenderExit(logger)
            else:
                logger.error(
                    "FileNotFoundError: No such file or directory: '{}'".
                    format(path))
                DMRenderExit(logger)
        except IOError as e:
            logger.error("IOError: {}".format(str(e)))
            DMRenderExit(logger)

        except UnicodeDecodeError as e:
            logger.error("UnicodeDecodeError: {}\n".format(str(e)))
            DMRenderExit(logger)
Ejemplo n.º 8
0
    def _export_records(records, prefix):
        opts = {}
        if (len(records) > 0):
            dc_dict = records_to_dm(records)
            for activity, dc in dc_dict.items():
                dc.activity = activity
                dc.fa_label = prefix
                dc.fn_label = "PMISS"
                save_dm(dc, dm_dir, "{}_{}.dm".format(prefix, activity))
                log(1, "[Info] Plotting {} DET curve for {}".format(prefix, activity))
                opts['title'] = activity
                save_DET(dc, figure_dir, "DET_{}_{}.png".format(prefix, activity), no_ppf, opts)

            mean_label = "{}_mean_byfa".format(prefix)
            dc_agg = DataContainer.aggregate(dc_dict.values(), output_label=mean_label, average_resolution=500)
            dc_agg.activity = "AGGREGATED"
            dc_agg.fa_label = prefix
            dc_agg.fn_label = "PMISS"
            save_dm(dc_agg, dm_dir, "{}.dm".format(mean_label))
            log(1, "[Info] Plotting mean {} curve for {} activities".format(prefix, len(dc_dict.values())))
            save_DET(dc_agg, figure_dir, "DET_{}.png".format(mean_label), no_ppf, opts)
            log(1, "[Info] Plotting combined {} DET curves".format(prefix))
            opts['title'] = "All Activities"
            save_DET(dc_dict.values(), figure_dir, "DET_{}_{}.png".format(prefix, "COMBINED"), no_ppf, opts)
            opts['title'] = "All Activities and Aggregate"
            save_DET(list(dc_dict.values()) + [dc_agg], figure_dir, "DET_{}_{}.png".format(prefix, "COMBINEDAGG"), no_ppf, opts)
Ejemplo n.º 9
0
    def __init__(self, config):
        DataContainer.__init__(self, config)
        self.conditions = dict()
        self.control = None
        self.use_scores = True

        try:
            for cnd in config['conditions']:
                if not cnd['label'].isalnum():
                    raise EnrichError(
                        "Alphanumeric label required for condition '%s'" %
                        cnd['label'], self.name)
                for sel_config in cnd[
                        'selections']:  # assign output base if not present
                    if 'output directory' not in sel_config:
                        sel_config['output directory'] = self.output_base
                self.conditions[cnd['label']] = [
                    selection.Selection(x) for x in cnd['selections']
                ]
                if cnd['control']:
                    if self.control is None:
                        self.control = self.conditions[cnd['label']]
                    else:
                        raise EnrichError("Multiple control conditions",
                                          self.name)
        except KeyError as key:
            raise EnrichError("Missing required config value %s" % key,
                              self.name)

        all_selections = list()
        for key in self.conditions:
            all_selections.extend(self.conditions[key])
        for dtype in all_selections[0].df_dict:
            if all(dtype in x.df_dict for x in all_selections):
                self.df_dict[dtype] = True
        if len(self.df_dict.keys()) == 0:
            raise EnrichError(
                "No enrichment data present across all selections", self.name)

        for key in self.conditions:
            if any(len(x.timepoints) == 2 for x in self.conditions[key]):
                self.use_scores = False
Ejemplo n.º 10
0
 def compute_auc(self, output_dir):
     prefix = ["RFA", "TFA"]
     auc_data = []
     mean_auc = []
     for p in prefix:
         for activity, activity_properties in self.activity_index.items():
             try:
                 dm_data = DataContainer.load(output_dir+"/dm/"+"{}_{}.dm".format(p, activity))
                 auc_data = auc_data + get_auc_new(dm_data, p, activity)
             except Exception as E:
                 # Raise exception for protocols which don't compute TFA metrics
                 pass
     mean_auc =  get_auc_mean(auc_data)
     return auc_data, mean_auc
Ejemplo n.º 11
0
    def compute_auc(self, output_dir):

        prefix = ["RFA", "TFA"]
        auc_data = []
        mean_auc = []
        for p in prefix:
            for activity, activity_properties in self.activity_index.items():
                try:
                    dm_data = DataContainer.load(
                        output_dir + "/dm/" + "{}_{}.dm".format(p, activity))
                    auc_data = auc_data + get_auc_new(dm_data, p, activity)
                except Exception as E:
                    print(E)
                    print(output_dir + "/dm/" +
                          "{}_{}.dm".format(p, activity) + "DNE")
        mean_auc = get_auc_mean(auc_data)
        return auc_data, mean_auc
Ejemplo n.º 12
0
def single_point_dm(fa_point,
                    fn_point,
                    threshold,
                    file_name,
                    label=None,
                    fa_label=None,
                    fn_label=None):
    my_dm = DataContainer(fa_array=[fa_point],
                          fn_array=[fn_point],
                          threshold=[threshold],
                          label=label,
                          fa_label=fa_label,
                          fn_label=fn_label)
    my_dm.validate_array_input()
    my_dm.dump(file_name)
Ejemplo n.º 13
0
    def __init__(self, config):
        DataContainer.__init__(self, config)
        self.libraries = dict()
        self.timepoints = list()
        self.use_scores = True
        self.normalize_wt = False
        self.ns_carryover_fn = None
        self.ns_carryover_kwargs = None
        self.use_barcode_variation = False

        try:
            if 'barcodes' in config:
                if 'map file' in config['barcodes']:
                    self.barcode_map = BarcodeMap(config['barcodes']
                                                        ['map file'])
                else:
                    self.barcode_map = None
            else:
                self.barcode_map = None

            libnames = list()
            bcmfiles = list()
            for lib in config['libraries']:
                if 'output directory' not in lib:
                    lib['output directory'] = self.output_base
                libtype = seqlib_type(lib)
                if libtype is None:
                    raise EnrichError("Unrecognized SeqLib config", self.name)
                elif libtype == "BarcodeVariantSeqLib":
                    new = BarcodeVariantSeqLib(lib, barcode_map=self.barcode_map)
                    bcmfiles.append(new.barcode_map.filename)
                else:
                    new = globals()[libtype](lib)

                if new.output_base is None:
                    new.set_output_base(self.output_base)

                if new.timepoint not in self.libraries:
                    self.libraries[new.timepoint] = list()
                self.libraries[new.timepoint].append(new)
                libnames.append(new.name)
            self.timepoints = sorted(self.libraries.keys())

            if len(set(libnames)) != len(libnames):
                raise EnrichError("Non-unique library names", self.name)

            if len(bcmfiles) == len(libnames): # all BarcodeVariant
                if len(set(bcmfiles)) == 1:    # all the same BarcodeMap
                    self.use_barcode_variation = True
                    unify_barcode_maps = False
                    if self.barcode_map is None: # same BarcodeMap specified for all SeqLibs
                        unify_barcode_maps = True
                    elif bcmfiles[0] != self.barcode_map.filename: # all SeqLibs are overriding the Selection BarcodeMap
                        unify_barcode_maps = True
                    else: # this BarcodeMap is being used for all SeqLibs
                        pass
                    if unify_barcode_maps:
                        self.barcode_map = self.libraries[0][0].barcode_map
                        for tp in self.timepoints:
                            for lib in self.libraries[tp]:
                                lib.barcode_map = self.barcode_map

            self.set_filters(config['filters'], {'min count' : 0,
                                      'min input count' : 0,
                                      'min rsquared' : 0.0,
                                      'max barcode variation' : None})

            if 'carryover correction' in config:
                if config['carrover correction']['method'] == "nonsense":
                    self.ns_carryover_fn = nonsense_ns_carryover_apply_fn
                    self.ns_carryover_kwargs = {'position' : int(config['carryover correction']['position'])}
                # add additional methods here using "elif" blocks
                else:
                    raise EnrichError("Unrecognized nonspecific carryover correction", self.name)

            if 'normalize wt' in config:
                if config['normalize wt'] is True:
                    self.normalize_wt = True

        except KeyError as key:
            raise EnrichError("Missing required config value %s" % key, 
                              self.name)
        except ValueError as value:
            raise EnrichError("Invalid parameter value %s" % value, self.name)

        if len(self.timepoints) < 2:
            raise EnrichError("Insufficient number of timepoints", 
                              self.name)
        elif len(self.timepoints) == 2:
            self.use_scores = False

        if 0 not in self.timepoints:
            raise EnrichError("Missing timepoint 0", self.name)
        if self.timepoints[0] != 0:
            raise EnrichError("Invalid negative timepoint", self.name)

        # identify what kind of counts data is present in all timepoints
        dtype_counts = list()
        for tp in self.timepoints:
            for lib in self.libraries[tp]:
                dtype_counts.extend(lib.df_dict.keys())
        dtype_counts = Counter(dtype_counts)
        for dtype in dtype_counts:
            if dtype_counts[dtype] == len(config['libraries']):
                self.df_dict[dtype] = True
        if 'barcodes_unmapped' in self.df_dict.keys(): # special case for BarcodeVariantSeqLib
            del self.df_dict['barcodes_unmapped']
        if 'barcodes_low_abuncande' in self.df_dict.keys(): # special case for BarcodeVariantSeqLib
            del self.df_dict['barcodes_low_abuncande']
        if len(self.df_dict.keys()) == 0:
            raise EnrichError("No count data present across all timepoints", 
                              self.name)
Ejemplo n.º 14
0
def evaluate_input(args):
    """This function parse and evaluate the argument from command line
        interface,
    it returns the list of DM files loaded with also potential custom plot and
        lines options provided.
    The functions parse the input argument and the potential custom options
        arguments (plot and lines).

    It first infers the type of input provided. The following 3 input type are
        supported:
        - type 1: A .txt file containing a pass of .dm file per lines
        - type 2: A single .dm path
        - type 3: A custom list of pairs of dictionnaries (see the input help
            from the command line parser)

    Then it loads custom (or defaults if not provided) plot and lines options
        per DM file.

    Args:
        args (argparse.Namespace): the result of the call of parse_args() on
            the ArgumentParser object

    Returns:
        Result (tuple): A tuple containing
            - DM_list (list): list of DM objects
            - opts_list (list): list of dictionnaries for the lines options
            - plot_opts (dict): dictionnary of plot options
    """
    def call_loader(path, logger):
        try:
            # Python2 support
            if os.path.isfile(path):
                dc = DataContainer.load(path)
                if hasattr(dc, "data_container_version") and \
                   dc.data_container_version == "2.0":
                    return dc
                else:
                    logger.error("Error: This type of data container is not \
                        supported (data_container_version not found or < 2.0)")
                    DMRenderExit(logger)
            else:
                logger.error("FileNotFoundError: No such file or directory: '\
                    {}'".format(path))
                DMRenderExit(logger)
        except IOError as e:
            logger.error("IOError: {}".format(str(e)))
            DMRenderExit(logger)

        except UnicodeDecodeError as e:
            logger.error("UnicodeDecodeError: {}\n".format(str(e)))
            DMRenderExit(logger)

    logger = logging.getLogger('DMlog')
    DM_list = list()
    # Case 1: text file containing one path per line
    if args.input.endswith('.txt'):
        logger.debug("Input of type 1 detected")
        input_type = 1
        if os.path.isfile(args.input):
            with open(args.input) as f:
                fp_list = f.read().splitlines()
        else:
            logger.error("FileNotFoundError: No such file or directory: '{}'\
                ".format(args.input))
            DMRenderExit(logger)

        for dm_file_path in fp_list:
            label = dm_file_path
            # We handle a potential label provided
            if ':' in dm_file_path:
                dm_file_path, label = dm_file_path.rsplit(':', 1)

            dm_obj = call_loader(dm_file_path, logger)
            dm_obj.path = dm_file_path
            dm_obj.label = label if dm_obj.label is None else dm_obj.label
            dm_obj.show_label = True
            DM_list.append(dm_obj)

    # Case 2: One dm pickled file
    elif args.input.endswith('.dm'):
        logger.debug("Input of type 2 detected")
        input_type = 2
        dm_obj = call_loader(args.input, logger)
        dm_obj.path = args.input
        dm_obj.label = args.input if dm_obj.label is None else dm_obj.label
        dm_obj.show_label = True
        DM_list = [dm_obj]

    # Case 3: String containing a list of input with their metadata
    elif args.input.startswith('[[') and args.input.endswith(']]'):
        logger.debug("Input of type 3 detected")
        input_type = 3
        try:
            input_list = literal_eval(args.input)
            for dm_data, dm_opts in input_list:
                logger.debug("dm_data: {}".format(dm_data))
                logger.debug("dm_opts: {}".format(dm_opts))
                dm_file_path = dm_data['path']
                dm_obj = call_loader(dm_file_path, logger)
                dm_obj.path = dm_file_path
                dm_obj.label = dm_data['label'] if dm_data['label'] is not \
                    None else dm_obj.label
                dm_obj.show_label = dm_data['show_label']
                dm_obj.line_options = dm_opts
                dm_obj.line_options['label'] = dm_obj.label
                DM_list.append(dm_obj)

        except ValueError as e:
            if not all([len(x) == 2 for x in input_list]):
                logger.error("ValueError: Invalid input format. All sub-lists \
                    must be a pair of two dictionaries.\n-> {}".format(str(e)))
            else:
                logger.error("ValueError: {}".format(str(e)))
            DMRenderExit(logger)

        except SyntaxError as e:
            logger.error("SyntaxError: The input provided is invalid.\n-> {}\
                ".format(str(e)))
            DMRenderExit(logger)

    else:
        logger.error("The input type does not match any of the following \
            inputs:\n- .txt file containing one file path per line\n- .dm file\
            \n- a list of pair [{'path':'path/to/dm_file','label':str,'\
            show_label':bool}, **{any matplotlib.lines.Line2D properties}].\n")
        DMRenderExit(logger)

    # Assertions: All the fa_labels and fn_labels MUST by unique
    fa_label = set([x.fa_label for x in DM_list])
    fn_label = set([x.fn_label for x in DM_list])
    assert (len(fa_label) == 1), "Error: DM files have mixed FA_labels {}\
        ".format(fa_label)
    assert (len(fn_label) == 1), "Error: DM files have mixed FN_labels {}\
        ".format(fn_label)

    if (args.aggregate is not None):
        logger.debug("Creating aggregated Line")
        try:
            dm_data, dm_opts = literal_eval(args.aggregate)
            dm_obj = DataContainer.aggregate(DM_list,
                                             output_label="TFA_mean_byfa",
                                             average_resolution=500)
            dm_obj.label = dm_data['label'] if dm_data['label'] is not None \
                else dm_obj.label
            dm_obj.activity = dm_obj.label
            dm_obj.fa_label = fa_label.pop()
            dm_obj.fn_label = fn_label.pop()
            dm_obj.show_label = dm_data['show_label']
            dm_obj.line_options = dm_opts
            dm_obj.line_options['label'] = dm_obj.label
            DM_list.append(dm_obj)

            if dm_data['path'] is not None:
                fname = "{}/{}".format(args.outputFolder, dm_data['path'])
                logger.debug("Writing aggregated Line to {}".format(fname))
                dm_obj.dump(fname)

        except ValueError as e:
            logger.error("ValueError: The aggrgate option had a value error {}\
                ".format(str(e)))
            DMRenderExit(logger)

        except SyntaxError as e:
            logger.error("SyntaxError: The aggregate option provided is \
                invalid.\n-> {}".format(str(e)))
            DMRenderExit(logger)

    # *-* Options Processing *-*

    # General plot options
    if not args.plotOptionJsonFile:
        logger.info("Generating the default plot options...")
        plot_opts = Render.gen_default_plot_options(args.plotType,
                                                    DM_list[0].fa_label,
                                                    DM_list[0].fn_label,
                                                    plot_title=args.plotTitle)

    else:
        logger.info("Loading of the plot options from the json config file...")
        if os.path.isfile(args.plotOptionJsonFile):
            with open(args.plotOptionJsonFile, 'r') as f:
                plot_opts = json.load(f)
            validate_plot_options(plot_opts)
        else:
            logger.error("FileNotFoundError: No such file or directory: '{}'\
                ".format(args.plotOptionJsonFile))
            DMRenderExit(logger)

    # line options
    if args.lineOptionJsonFile and input_type != 3:
        logger.info("Loading of the lines options from the json config file \
            and overriding data container line settings...")
        if os.path.isfile(args.lineOptionJsonFile):

            with open(args.lineOptionJsonFile, 'r') as f:
                opts_list = json.load(f)

            if len(opts_list) != len(DM_list):
                print("ERROR: the number of the line options is different \
                    with the number of the DM objects: ({} < {})".format(
                    len(opts_list), len(DM_list)))
                DMRenderExit(logger)
            else:
                for dm, line_options in zip(DM_list, opts_list):
                    dm.line_options = line_options
        else:
            logger.error("FileNotFoundError: No such file or directory: '{}'\
                ".format(args.lineOptionJsonFile))
            DMRenderExit(logger)

    if args.confidenceInterval:
        plot_opts['confidence_interval'] = True

    return DM_list, plot_opts
Ejemplo n.º 15
0
    def run(self):
        agstart = time.time()
        for i in xrange(self.no_sims):
            logging.info("Going for simulation %d"%(i+1))
            gc.collect()
            run_id = str(uuid4())

            with DataContainer(self.config,run_id,self.aggregate_id) as dc:
                p = Progress(self.config['model']['no_steps'])

                model_class = None
                if(self.market_type == 1):
                    logging.info("Using default Market")
                    model_class = Market
                elif(self.market_type == 2):
                    logging.info("Using ShuffleIRSMarket")
                    model_class = ShuffleIRSMarket
                elif(self.market_type == 3):
                    logging.info("Using SortedIRSMarket")
                    model_class = SortedIRSMarket
                elif(self.market_type == 4):
                    logging.info("Using RandomSortedIRSMarket")
                    model_class = SortedRandomIRSMarket
                elif(self.market_type == 5):
                    logging.info("Using RandomShuffleIRSMarket")
                    model_class = ShuffleRandomIRSMarket
                elif(self.market_type == 6):
                    logging.info("Using ConstantRandomShuffleIRSMarket")
                    model_class = ConstShuffleIRSMarket
                elif(self.market_type == 7):
                    logging.info("Using quick CRS-IRS-Mkt")
                    model_class = sim
                else:
                    raise "No such market type"

                p.start()
                start = time.time()
                with model_class(self.config['model'],dc,p.update) as m:
                    m.run()

                t = time.time()-start
                p.finish()

                print ""
                logging.info("Run took %f seconds"%t)

                if(self.config['analysis']['do_analysis']):
                    start = time.time()
                    self.do_analysis(dc,run_id)
                    t = time.time()-start
                    logging.info("Analysis took %f seconds"%t)

                if(self.save_data):
                    start = time.time()
                    dc.save_data()
                    t = time.time()-start
                    logging.info("Saving data took %f seconds"%t)

            gc.collect()
            print ""
            print ""

        gc.collect()
        dt = (time.time() - agstart) / 60
        logging.info("Experiment took %f minutes"%dt)

        if(self.config['aggregate']['do_aggregate'] and self.save_data):
            start = time.time()
            self.do_aggregate(dc,run_id)
            logging.info('Aggregation took %f seconds'%(time.time()-start))
Ejemplo n.º 16
0
    def __init__(self, config):
        DataContainer.__init__(self, config)
        self.libraries = dict()
        self.timepoints = list()

        try:
            if 'barcodes' in config:
                if 'map file' in config['barcodes']:
                    self.barcode_map = BarcodeMap(config['barcodes']
                                                        ['map file'])
                else:
                    self.barcode_map = None
            else:
                self.barcode_map = None

            libnames = list()
            for lib in config['libraries']:
                if 'output directory' not in lib:
                    lib['output directory'] = self.output_base
                libtype = seqlib_type(lib)
                if libtype is None:
                    raise EnrichError("Unrecognized SeqLib config", self.name)
                elif libtype == "BarcodeVariantSeqLib":
                    new = BarcodeVariantSeqLib(lib, barcode_map=self.barcode_map)
                else:
                    new = globals()[libtype](lib)

                if new.output_base is None:
                    new.set_output_base(self.output_base)

                if new.timepoint not in self.libraries:
                    self.libraries[new.timepoint] = list()
                self.libraries[new.timepoint].append(new)
                libnames.append(new.name)
            self.timepoints = sorted(self.libraries.keys())

            if len(set(libnames)) != len(libnames):
                raise EnrichError("Non-unique library names", self.name)

            self.set_filters(config['filters'], {'min count' : 0,
                                      'min input count' : 0,
                                      'min rsquared' : 0.0,
                                      'max barcode variation' : None})

            if 'carryover correction' in config:
                if config['carrover correction']['method'] == "nonsense":
                    self.ns_carryover_fn = nonsense_ns_carryover_apply_fn
                    self.ns_carryover_kwargs = {'position' : int(config['carryover correction']['position'])}
                # add additional methods here using "elif" blocks
                else:
                    raise EnrichError("Unrecognized nonspecific carryover correction", self.name)
            else:
                self.ns_carryover_fn = None
                self.ns_carryover_kwargs = None

        except KeyError as key:
            raise EnrichError("Missing required config value %s" % key, 
                              self.name)
        except ValueError as value:
            raise EnrichError("Invalid parameter value %s" % value, self.name)

        if len(self.libraries.keys()) < 2:
            raise EnrichError("Insufficient number of timepoints", 
                              self.name)

        if 0 not in self.timepoints:
            raise EnrichError("Missing timepoint 0", self.name)
        if self.timepoints[0] != 0:
            raise EnrichError("Invalid negative timepoint", self.name)

        # identify what kind of counts data is present in all timepoints
        dtype_counts = list()
        for tp in self.timepoints:
            for lib in self.libraries[tp]:
                dtype_counts.extend(lib.counts.keys())
        dtype_counts = Counter(dtype_counts)
        for dtype in dtype_counts:
            if dtype_counts[dtype] == len(config['libraries']):
                self.df_dict[dtype] = True
        if 'barcodes_unmapped' in self.df_dict.keys(): # special case for BarcodeVariantSeqLib
            del self.df_dict['barcodes_unmapped']
        if len(self.df_dict.keys()) == 0:
            raise EnrichError("No count data present across all timepoints", 
                              self.name)

        try:
            if 'correction' in config:
                if config['correction']['method'] == "stop":
                    if not self.libraries[0].is_coding():
                        raise EnrichError("Invalid correction method for "
                                          "noncoding sequences", self.name)
                    else:
                        config['correction']['length percentile'] # must exist
                        self.correction = config['correction']
            else:
                self.correction = None
        except KeyError as key:
            raise EnrichError("Missing required config value %s" % key, self.name)
Ejemplo n.º 17
0
	def initialize(self):
		DataContainer.initialize(self)
		self.__style = self.get_style()
		self.__style &= ~ListView.STYLE_WRAP
		self.set_style(self.__style)
Ejemplo n.º 18
0
def do_run(steps,
           no_banks,
           threshold,
           max_tenure,
           max_irs_value,
           avalanche_fraction=0.9):
    #steps = 10000
    save = False
    save_risk = False
    save_risk_avalanche_time_series = False
    save_dist = False
    save_giant_component = False
    save_avalanche_progression = False
    save_critical_info = False
    save_avalanche_tree = False
    save_degree_distribution = False
    no_connection_scatter_moments = 0
    connection_scatter_moments = np.random.randint(
        0, steps, no_connection_scatter_moments)

    seed = np.random.randint(0, 1000)
    dcconfig = {
        'model': {
            'no_banks': no_banks,
            'no_steps': steps,
            'threshold': threshold,
            'sigma': 1,
            'max_irs_value': max_irs_value,
            'irs_threshold': -1,
            'dissipation': 0.0,
            'max_tenure': max_tenure
        },
        'analysis': {
            'data_to_save': ['defaults']
        },
        'file_root': './simulation_data/',
        'market_type': 7,
        'seed': seed
    }

    measure_no_steps = 2 * dcconfig['model']['max_tenure']

    ###########################################################################
    dc = DataContainer(dcconfig, str(uuid4()), str(uuid4()))
    p = Progress(steps)

    s = sim(dcconfig['model'],
            dc,
            p.update,
            save_risk,
            save_dist,
            connection_scatter_moments,
            seed,
            avalanche_fraction=avalanche_fraction)
    s.save_degree_distribution = save_degree_distribution
    if (s.save_degree_distribution):
        s.degrees = np.zeros((steps, dcconfig['model']['no_banks']))
        s.no_irs = np.zeros((steps, dcconfig['model']['no_banks']))
    s.save_avalanche_progression = save_avalanche_progression
    s.save_risk_avalanche_time_series = save_risk_avalanche_time_series
    s.collect_critical_info = save_critical_info
    s.save_giant_component = save_giant_component
    s.save_avalanche_tree = save_avalanche_tree
    s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id

    s.irs_creations = np.zeros(steps)
    s.irs_removals = np.zeros(steps)

    if (s.save_avalanche_tree):
        os.makedirs(s.avalanche_tree_file_path)

    if (save_giant_component): s.giant_components = np.zeros(s.no_steps)
    ###########################################################################

    start = time.time()
    p.start()
    tme, size = s.run()
    print
    p.finish()

    defaulting_bank = s.defaulting_bank_no
    start_at = tme - measure_no_steps + 1

    print "Large enough avalanche found at %d of size %d" % (tme, size)

    print
    print "Run took %d seconds" % (time.time() - start)
    print
    print "Going for the analysis"

    ###########################################################################
    ## Actual stuff thats needed
    dc = DataContainer(dcconfig, str(uuid4()), str(uuid4()))
    p = Progress(steps)

    s = sim(dcconfig['model'],
            dc,
            p.update,
            save_risk,
            save_dist,
            connection_scatter_moments,
            seed,
            start_at,
            defaulting_bank,
            avalanche_fraction=avalanche_fraction)

    nb = dcconfig['model']['no_banks']
    s.measured_balances = np.zeros((measure_no_steps, nb))
    s.measured_gross_balances = np.zeros((measure_no_steps, nb))
    s.degrees = np.zeros((measure_no_steps, nb))
    s.no_irs = np.zeros((measure_no_steps, nb))
    #s.giant_component = []
    s.defaulted_nodes = []
    s.irs_pb = []
    s.network = np.zeros((nb, nb))
    s.irs_creations = np.zeros(steps)
    s.irs_removals = np.zeros(steps)

    #################
    s.save_degree_distribution = save_degree_distribution
    s.save_avalanche_progression = save_avalanche_progression
    s.save_risk_avalanche_time_series = save_risk_avalanche_time_series
    s.collect_critical_info = save_critical_info
    s.save_giant_component = save_giant_component
    s.save_avalanche_tree = save_avalanche_tree
    s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id
    if (s.save_avalanche_tree):
        os.makedirs(s.avalanche_tree_file_path)
    if (save_giant_component): s.giant_components = np.zeros(s.no_steps)
    ###########################################################################

    start = time.time()
    p.start()
    tme, size = s.run()
    p.finish()
    print
    print "Large enough avalanche found at %d of size %d" % (tme, size)

    if s.save_avalanche_progression:
        print "Saving avalanche progression"
        file_path = './simulation_data/avalanche_progression/%s.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.avalanche_progressions, fp)
            pickle.dump(dcconfig, fp)

    if s.collect_critical_info:
        print "Critical info"
        file_path = './simulation_data/critical/%s.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.critical_info, fp)
            pickle.dump(s.max_default_size_t.tolist(), fp)
            if (s.save_giant_component):
                pickle.dump(s.giant_components.tolist(), fp)
            pickle.dump(dcconfig, fp)

    if len(connection_scatter_moments) > 0:
        print "Connection Scatters"
        file_path = './simulation_data/connection_scatters/%s.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.connection_scatters, fp)

    if save_dist:
        file_path = './simulation_data/dists/%s.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.trials, fp)
            pickle.dump(dcconfig['model']['no_banks'], fp)

    if (True):
        os.makedirs("./simulation_data/large_avalanche_data/%s" %
                    dc.aggregate_id)
        print "Saving stuff"
        file_path = './simulation_data/large_avalanche_data/%s/degrees.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.degrees.tolist(), fp)

        file_path = './simulation_data/large_avalanche_data/%s/no_irs.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.no_irs.tolist(), fp)
            pickle.dump(s.irs_pb, fp)

        file_path = './simulation_data/large_avalanche_data/%s/balances.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.measured_balances.tolist(), fp)
            pickle.dump(s.measured_gross_balances.tolist(), fp)

        #file_path = './simulation_data/large_avalanche_data/%s/gc.bin'%dc.aggregate_id
        #with file(file_path,'wb') as fp:
        #    pickle.dump(s.giant_component,fp)

        file_path = './simulation_data/large_avalanche_data/%s/network.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.network.tolist(), fp)

        file_path = './simulation_data/large_avalanche_data/%s/defaulted.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.defaulted_nodes, fp)

        file_path = './simulation_data/large_avalanche_data/%s/irs_data.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.irs_creations.tolist(), fp)
            pickle.dump(s.irs_removals.tolist(), fp)

        dcconfig['failed_bank'] = s.defaulting_bank_no
        file_path = './simulation_data/large_avalanche_data/%s/config.json' % dc.aggregate_id
        with open(file_path, 'w') as fp:
            json.dump(dcconfig, fp, indent=4)

    print dc.aggregate_id
Ejemplo n.º 19
0

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)

    c = {'sigma' : 1,
    'no_banks' : 30,
    'no_steps' : 200,
    'irs_threshold' : 15,
    'max_irs_value' : 20,
    'max_tenure' : 80,
    'no_sims' : 1,
    'threshold' : 4}

    mkt = ShuffleIRSMarket(c,
                DataContainer({'file_root':'./test','model':{'no_steps':1}},str(uuid4()),str(uuid4())),
                None)


    banks = mkt.banks
    b1 = banks[0]
    b1.__balance__ = -5
    b2 = banks[1]
    b2.__balance__ = 5
    b3 = banks[2]
    b3.__balance__ = 3

    b1.set_dirty()
    b2.set_dirty()
    b3.set_dirty()
Ejemplo n.º 20
0
            'no_steps': steps,
            'threshold': 10,
            'sigma': 1,
            'max_irs_value': 7,  #4,
            'irs_threshold': -1,
            'dissipation': 0.0,
            'max_tenure': 400
        },
        'analysis': {
            'data_to_save': ['defaults']
        },
        'file_root': './simulation_data/',
        'market_type': 7
    }

    dc = DataContainer(dcconfig, str(uuid4()), str(uuid4()))
    p = Progress(steps)

    s = sim(dcconfig['model'], dc, p.update, save_risk, save_dist,
            connection_scatter_moments)
    s.save_degree_distribution = save_degree_distribution
    if (s.save_degree_distribution):
        s.degrees = np.zeros((steps, dcconfig['model']['no_banks']))
        s.no_irs = np.zeros((steps, dcconfig['model']['no_banks']))
    s.save_avalanche_progression = save_avalanche_progression
    s.save_risk_avalanche_time_series = save_risk_avalanche_time_series
    s.collect_critical_info = save_critical_info
    s.save_giant_component = save_giant_component
    s.save_avalanche_tree = save_avalanche_tree
    s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id
    s.save_degree_on_default = save_degree_on_default
Ejemplo n.º 21
0
        cur_config = copy.deepcopy(dcconfig)

        cur_config['model']['no_banks'] = no_banks
        cur_config['model']['max_irs_value'] = max_irs_value
        cur_config['model']['max_tenure'] = tenure
        cur_config['model']['threshold'] = threshold

        aggregate_id = str(uuid4())
        for i in xrange(no_reps):
            print "Startin run %d of %d" % (cnt, nosims)

            run_id = str(uuid4())
            p = Progress(steps)
            start = time.time()

            with DataContainer(cur_config, run_id, aggregate_id) as dc:
                with sim(cur_config['model'], dc, p.update, save_risk,
                         False) as cursim:
                    config_sim(cursim)
                    p.start()
                    cursim.run()
                    p.finish()

                    if (save):
                        dc.save_defaults()
                        dc.save_run()

                    if cursim.save_degree_distribution:
                        directory = './simulation_data/k/irs_value_%s' % max_irs_value
                        file_path = '%s/%s_%s.bin' % (directory,
                                                      dc.aggregate_id, i)
Ejemplo n.º 22
0
 def initialize(self):
     DataContainer.initialize(self)
     self.__style = self.get_style()
     self.__style &= ~ListView.STYLE_WRAP
     self.set_style(self.__style)
Ejemplo n.º 23
0
    def __init__(self, config):
        DataContainer.__init__(self, config)
        self.libraries = dict()
        self.timepoints = list()

        try:
            if 'barcodes' in config:
                if 'map file' in config['barcodes']:
                    self.barcode_map = BarcodeMap(
                        config['barcodes']['map file'])
                else:
                    self.barcode_map = None
            else:
                self.barcode_map = None

            libnames = list()
            for lib in config['libraries']:
                if 'output directory' not in lib:
                    lib['output directory'] = self.output_base
                libtype = seqlib_type(lib)
                if libtype is None:
                    raise EnrichError("Unrecognized SeqLib config", self.name)
                elif libtype == "BarcodeVariantSeqLib":
                    new = BarcodeVariantSeqLib(lib,
                                               barcode_map=self.barcode_map)
                else:
                    new = globals()[libtype](lib)

                if new.output_base is None:
                    new.set_output_base(self.output_base)

                if new.timepoint not in self.libraries:
                    self.libraries[new.timepoint] = list()
                self.libraries[new.timepoint].append(new)
                libnames.append(new.name)
            self.timepoints = sorted(self.libraries.keys())

            if len(set(libnames)) != len(libnames):
                raise EnrichError("Non-unique library names", self.name)

            self.set_filters(
                config['filters'], {
                    'min count': 0,
                    'min input count': 0,
                    'min rsquared': 0.0,
                    'max barcode variation': None
                })

            if 'carryover correction' in config:
                if config['carrover correction']['method'] == "nonsense":
                    self.ns_carryover_fn = nonsense_ns_carryover_apply_fn
                    self.ns_carryover_kwargs = {
                        'position':
                        int(config['carryover correction']['position'])
                    }
                # add additional methods here using "elif" blocks
                else:
                    raise EnrichError(
                        "Unrecognized nonspecific carryover correction",
                        self.name)
            else:
                self.ns_carryover_fn = None
                self.ns_carryover_kwargs = None

        except KeyError as key:
            raise EnrichError("Missing required config value %s" % key,
                              self.name)
        except ValueError as value:
            raise EnrichError("Invalid parameter value %s" % value, self.name)

        if len(self.libraries.keys()) < 2:
            raise EnrichError("Insufficient number of timepoints", self.name)

        if 0 not in self.timepoints:
            raise EnrichError("Missing timepoint 0", self.name)
        if self.timepoints[0] != 0:
            raise EnrichError("Invalid negative timepoint", self.name)

        # identify what kind of counts data is present in all timepoints
        dtype_counts = list()
        for tp in self.timepoints:
            for lib in self.libraries[tp]:
                dtype_counts.extend(lib.counts.keys())
        dtype_counts = Counter(dtype_counts)
        for dtype in dtype_counts:
            if dtype_counts[dtype] == len(config['libraries']):
                self.df_dict[dtype] = True
        if 'barcodes_unmapped' in self.df_dict.keys(
        ):  # special case for BarcodeVariantSeqLib
            del self.df_dict['barcodes_unmapped']
        if len(self.df_dict.keys()) == 0:
            raise EnrichError("No count data present across all timepoints",
                              self.name)

        try:
            if 'correction' in config:
                if config['correction']['method'] == "stop":
                    if not self.libraries[0].is_coding():
                        raise EnrichError(
                            "Invalid correction method for "
                            "noncoding sequences", self.name)
                    else:
                        config['correction']['length percentile']  # must exist
                        self.correction = config['correction']
            else:
                self.correction = None
        except KeyError as key:
            raise EnrichError("Missing required config value %s" % key,
                              self.name)