Example #1
0
    def setUp(self):
        """
        Set up the database with some orders ready for a credit
        :return:
        """
        # Build the tests Logger
        self.log = logging.Logger('Tests')
        stream = logging.StreamHandler()
        formatter = logging.Formatter(fmt='%(message)s')
        stream.setFormatter(formatter)
        self.log.addHandler(stream)
        # set us up a bottle application with correct config
        self.app = bottle.Bottle()
        config.load(self.app, self.log, join('tests', 'config'), log_output=False)
        # build the database if it doesn't exist
        database.build(self.app, self.log, log_output=False)
        # clear any existing orders in the database
        conn = database.get_db(self.app)
        c = conn.cursor()
        c.execute("DELETE FROM orders")
        c.execute("DELETE FROM credits")
        conn.commit()
        # create test data
        # 5 test users each with 100 NBT on each exchange/pair/side/rank
        self.test_data = {}
        test_order_number = 1
        for i in xrange(0, 5):
            for unit in ['btc', 'ppc']:
                for side in ['ask', 'bid']:
                    for rank in ['rank_1', 'rank_2']:
                        c.execute("INSERT INTO orders (key,rank,order_id,order_amount,"
                                  "side,exchange,unit,credited) VALUES "
                                  "(%s,%s,%s,%s,%s,%s,%s,%s)",
                                  ('TEST_USER_{}'.format(i + 1), rank, test_order_number,
                                   100, side, 'test_exchange', unit, 0))
                        test_order_number += 1
        conn.commit()
        conn.close()

        # setup test data for test_get_total_liquidity]
        # get the orders from the database
        conn = database.get_db(self.app)
        c = conn.cursor()
        c.execute("SELECT * FROM orders")
        orders = c.fetchall()
        # get the liquidity as calculated by the main function
        self.total_liquidity = credit.get_total_liquidity(self.app, orders)

        # setup data for test_calculate_rewards
        # target for btc is 2500. total for btc is 2000.0 which is 0.8 of target
        # so already reward for btc is 0.02 instead of 0.025
        # ask and bid are 50:50 so each gets 0.01. rank_1 ratio is 1.0 and rank_2 is 0 for
        # both.
        #
        # target for ppc is 1500. total for ppc is 2000.0 so full reward of 0.0250
        # ask is 0.6 * 0.025 = 0.015
        # bid is 0.4 * 0.025 = 0.010
        # ask rank_1 is 1
        # bid rank_1 is 0.8 * 0.010 = 0.008
        # bid rank_2 is 0.2 * 0.010 = 0.002
        self.rewards = {'test_exchange': {'btc': {'ask': {'rank_1': 0.01,
                                                          'rank_2': 0.0},
                                                  'bid': {'rank_1': 0.01,
                                                          'rank_2': 0.0}
                                                  },

                                          'ppc': {'ask': {'rank_1': 0.015,
                                                          'rank_2': 0.0},
                                                  'bid': {'rank_1': 0.008,
                                                          'rank_2': 0.002}}}}
Example #2
0
    app.install(bottle_pgsql.Plugin('dbname={} user={} password={} '
                                    'host={} port={}'.format(url.path[1:],
                                                             url.username,
                                                             url.password,
                                                             url.hostname,
                                                             url.port)))
else:
    app.install(bottle_pgsql.Plugin('dbname={} user={} password={} '
                                    'host={} port={}'.format(app.config['db.name'],
                                                             app.config['db.user'],
                                                             app.config['db.pass'],
                                                             app.config['db.host'],
                                                             app.config['db.port'])))

# Create the database if one doesn't exist
database.build(app, log)

# Create the Exchange wrapper objects
wrappers = {}
if 'bittrex' in app.config['exchanges']:
    wrappers['bittrex'] = src.exchanges.Bittrex()
if 'bter' in app.config['exchanges']:
    wrappers['bter'] = src.exchanges.BTER()
if 'ccedk' in app.config['exchanges']:
    wrappers['ccedk'] = src.exchanges.CCEDK()
if 'poloniex' in app.config['exchanges']:
    wrappers['poloniex'] = src.exchanges.Poloniex()
if 'test_exchange' in app.config['exchanges']:
    wrappers['test_exchange'] = src.exchanges.TestExchange()

# save the start time of the server for reporting up-time
Example #3
0
def id_spectra(
    spectra_files: list, 
    database_file: str, 
    verbose: bool = True, 
    min_peptide_len: int = 5, 
    max_peptide_len: int = 20, 
    peak_filter: int = 0, 
    relative_abundance_filter: float = 0.0,
    ppm_tolerance: int = 20, 
    precursor_tolerance: int = 10, 
    digest: str = '',
    cores: int = 1,
    n: int = 5,
    DEBUG: bool = False, 
    truth_set: str = '', 
    output_dir: str = ''
) -> dict:
    '''Load in all the spectra and try to create an alignment for every spectrum

    :param spectra_files: file names of input spectra
    :type spectra_files: list
    :param database_file: file name of the fasta database
    :type database_file: str
    :param verbose: print progress to the console. 
        (default is True)
    :type verbose: bool
    :param min_peptide_len: the minimum length alignment to create
        (default is 5)
    :type min_peptide_len: int
    :param max_peptide_len: the maximum length alignment to create
        (default is 20)
    :type max_peptide_len: int
    :param peak_filter: If set to a number, this metric is used over the relative abundance filter. 
        The most abundanct X peaks to use in the alignment. 
        (default is 0)
    :type peak_filter: int
    :param relative_abundance_filter: If peak_filter is set, this parameter is ignored. The 
        relative abundance threshold (in percent as a decimal) a peak must be of the total 
        intensity to be used in the alignment. 
        (default is 0.0)
    :type relative_abundance_filter: float
    :param ppm_tolerance: the parts per million error allowed when trying to match masses
        (default is 20)
    :type ppm_tolerance: int
    :param precursor_tolerance: the parts per million error allowed when trying to match
        a calculated precursor mass to the observed precursor mass
        (default is 10)
    :type precurosor_tolerance: int
    :param digest: the type of digest used in the sample preparation. If left blank, 
        a digest-free search is performed. 
        (default is '')
    :type digest: str
    :param cores: the number of cores allowed to use in running the program. If a number 
        provided is greater than the number of cores available, the maximum number of 
        cores is used. 
        (default is 1)
    :type cores: int
    :param n: the number of aligments to keep per spectrum. 
        (default is 5)
    :type n: int
    :param DEBUG: DEVELOPMENT USE ONLY. Used only for timing of modules. 
        (default is False)
    :type DEBUG: bool
    :param truth_set: the path to a json file of the desired alignments to make for each spectrum. 
        The format of the file is {spectrum_id: {'sequence': str, 'hybrid': bool, 'parent': str}}. 
        If left an empty string, the program proceeds as normal. Otherwise results of the analysis
        will be saved in the file 'fall_off.json' saved in the output directory specified.
        (default is '')
    :type truth_set: str
    :param output_dir: the full path to the output directory to save all output files.
        (default is '')
    :type output_dir: str

    :returns: alignments for all spectra save in the form {spectrum.id: Alignments}
    :rtype: dict
    '''

    DEV = False
    truth = None

    # for dev use only. If a truth set is passed in, we can check where results
    # drop off. 
    if is_json(truth_set) and is_file(truth_set):
        DEV = True
        print(
            '''
DEV set to True. 
Tracking when correct answer falls off. 
Results are stored in a json named 'fall_off.json' in the specified output directory
File will be of the form

    {
        spectrum_id: {
            hybrid: bool, 
            truth_sequence: str, 
            fall_off_operation: str, 
        }
    }
            '''
        )
        # load in the truth set
        truth = json.load(open(truth_set, 'r'))

    fall_off = None

    # build/load the database
    verbose and print('Loading database...')
    db = database.build(database_file)
    verbose and print('Done')

    
    # load all of the spectra
    verbose and print('Loading spectra...')
    spectra, boundaries, mz_mapping = preprocessing_utils.load_spectra(
        spectra_files, 
        ppm_tolerance,
        peak_filter=peak_filter, 
        relative_abundance_filter=relative_abundance_filter
    )
    verbose and print('Done')

    # get the boundary -> kmer mappings for b and y ions
    matched_masses_b, matched_masses_y, db = merge_search.match_masses(boundaries, db, max_peptide_len)

    # keep track of the alingment made for every spectrum
    results = {}

    if DEV:
        fall_off = {}
        fall_off = mp.Manager().dict()
        truth = mp.Manager().dict(truth)

    # if we only get 1 core, don't do the multiporcessing bit
    if cores == 1:
        # go through and id all spectra
        for i, spectrum in enumerate(spectra):

            print(f'Creating alignment for spectrum {i+1}/{len(spectra)} [{to_percent(i+1, len(spectra))}%]', end='\r')

            # get b and y hits
            b_hits, y_hits = [], []
            for mz in spectrum.spectrum:

                # get the correct boundary
                mapped = mz_mapping[mz]
                b = boundaries[mapped]
                b = hashable_boundaries(b)

                if b in matched_masses_b:
                    b_hits += matched_masses_b[b]

                if b in matched_masses_y:
                    y_hits += matched_masses_y[b]

            is_last = DEBUG and i == len(spectra) - 1

            # pass it into id_spectrum
            results[spectrum.id] = id_spectrum(
                spectrum, 
                db, 
                b_hits, 
                y_hits, 
                ppm_tolerance, 
                precursor_tolerance,
                n,
                digest_type=digest,
                truth=truth, 
                fall_off=fall_off, 
                is_last=is_last
            )

    else:

        print('Initializing other processors...')
        results = mp.Manager().dict()

        if DEV:
            fall_off = mp.Manager().dict()
            truth = mp.Manager().dict(truth)

        # start up processes and queue for parallelizing things
        q = mp.Manager().Queue()
        num_processes = cores
        ps = [
            mp.Process(
                target=mp_id_spectrum, 
                args=(q, copy.deepcopy(db), results, fall_off, truth)
            ) for _ in range(num_processes) 
        ]

        # start each of the process
        for p in ps:
            p.start()
        print('Done.')

        # go through and id all spectra
        for i, spectrum in enumerate(spectra):
            # get b and y hits
            b_hits, y_hits = [], []
            for mz in spectrum.spectrum:

                # get the correct boundary
                mapped = mz_mapping[mz]
                b = boundaries[mapped]
                b = hashable_boundaries(b)

                if b in matched_masses_b:
                    b_hits += matched_masses_b[b]

                if b in matched_masses_y:
                    y_hits += matched_masses_y[b]

            # create a named tuple to put in the database
            o = MPSpectrumID(
                b_hits, 
                y_hits, 
                spectrum, 
                ppm_tolerance, 
                precursor_tolerance, 
                n, 
                digest
            )
            
            q.put(o)

        while len(results) < len(spectra):
            print(f'\rCreating an alignment for {len(results)}/{len(spectra)} [{to_percent(len(results), len(spectra))}%]', end='')
            time.sleep(1)

        # now send 'exit' message to all our processes
        [q.put('exit') for _ in range(num_processes)]

        # join them
        for p in ps:
            p.join()

    # if we have set DEV, we need to dump this to a json
    if DEV:
        output_dir = output_dir + '/' if output_dir[-1] != '/' else output_dir

        safe_write_fall_off = {}

        # we need to convert all our DEVFallOffEntries to dicts
        for k, v in fall_off.items():
            safe_write_fall_off[k] = v._asdict()

        JSON.save_dict(output_dir + 'fall_off.json', safe_write_fall_off)
        
    return results