def msgf2seq_file(filepath, fasta_file, msb_psms): """ msb_psms: set of spectid_peptidesequence """ def parse_spec_pep_row(r): # get spec_pep from _best file format parsed = '_'.join(r[0].split('.')[:2] + [r[4]]) #print parsed return parsed usedir,fin = os.path.split(filepath) # Get the sample filename from the first item of the third line fout = next(it.islice(ut.load_tab_file(filepath),2,3))[0].split('.')[0] in_gen = ut.load_tab_file(filepath) in_gen.next(); in_gen.next() # skip 2 lines p2g = seqs.prots2genes(fasta_file) g2p = ut.dict_inverse(p2g) fout = os.path.join(usedir, '.'.join([fout, fin.split('.')[-1] , 'sequestformat'])) search = searches[filepath.split('.')[-1]] print "Converting/filtering; Search:", search output = (msgfbest2sequest_line(r,p2g, g2p, search) for r in in_gen if parse_spec_pep_row(r) in msb_psms) print "Writing", fout ut.write_tab_file(output, fout) return fout
def ensg_to_ensp_and_park(ppips): dhpg = seqs.prots2genes('/Users/blakeweb/Dropbox/complex/data/sequences/canon/Hs.fasta') dhgp = ut.dict_inverse(dhpg) parkids = ut.load_lol('./orth_similarities/table.Hsapiens/Hsapiens_id.txt') ppips_ensp = [dhgp[g] for g in ppips] dg2park = dict([(x[2],x[0]) for x in parkids]) dp2park = dict([(x[1],x[0]) for x in parkids]) park_ppips_most = [dp2park[p] for p in ppips_ensp if p in dp2park] ppips_ensp_rest = [p for p in ppips_ensp if p not in dp2park] ppips_ensg_rest = [dhpg[p] for p in ppips_ensp_rest] park_ppips_rest = [dg2park[p] for p in ppips_ensg_rest if p in dg2park] park_ppips = park_ppips_most + park_ppips_rest return park_ppips
def check(fasta, protq, do_convert): p2g = seqs.prots2genes(fasta) g2p = ut.dict_inverse(p2g) fprots = el.load_elution(protq).prots print "checking", ut.shortname(protq) print "proteins: %s of %s" % (len([p for p in fprots if p in p2g]), len(fprots)) ngenesfound = len([p for p in fprots if p in g2p]) print "genes: %s of %s" % (ngenesfound, len(fprots)) if do_convert and ngenesfound < len(fprots): print "converting prots to genes:", protq seqs.elut_p2g(protq, p2g)
def main(options): if options.initial_point.lower( ) == 'true' and options.qbits_limit.lower() != 'true': raise Exception( "Can't use initial_point if the number of required qubits varies from one optimization step to the following. Set also qbits_limit to true." ) # fixed values stock_tickers = ['FXD', 'FXR', 'FXL', 'FTXR', 'QTEC'] start_date = date(2017, 1, 1) computation_date = date(2020, 1, 1) end_date = date(2021, 1, 1) #hyperparameters optim_dict = { "quantum_instance": 'qasm_simulator', "shots": 1024, "print": True, "logfile": True, "solver": 'vqe', "optimizer": SLSQP, "maxiter": 1000, "depth": 1, "alpha": 0.35 } # etf collector etfs = {} # building the quantum etf quantum_etf = {} date_ = copy(computation_date) while date_ < end_date: # generate main values for the model prices_, mu_, sigma_ = generate_values(stock_tickers=stock_tickers + ['IFV'], start_date=start_date, end_date=date_) # values for quantum etf prices = prices_[:-1] mu = mu_[:-1] sigma = sigma_[:-1, :-1] # find budget, best allocation, results if len(quantum_etf) == 0: B = copy(options.budget) else: previous_month_etf = copy( quantum_etf[previous_month(date_).strftime('%Y-%m-%d')]) B = previous_month_etf['liquidity'] + np.sum( np.array(previous_month_etf['allocation']) * prices) if options.initial_point.lower() == 'true': print( "Using best parameters of previous optimization as an initial point" ) optim_dict['initial_point'] = last_optimal_params if options.qbits_limit.lower() == 'true': qbits_dict = { k0: max(l0) for k0, l0 in dict_inverse({ j: model_qbits(prices, j, B) for j in range(1, options.max_k) }).items() } if options.max_qbits in qbits_dict: k_ = qbits_dict[options.max_qbits] elif options.max_qbits > max(qbits_dict): k_ = copy(options.max_k) warn( "Number of qbits provided exceeds qbits dictionary. Initialising k as the maximum given." ) else: warn( 'Number of qbits given not found among possible models.Choosing k equal to given value' ) k_ = copy(options.k) else: k_ = copy(options.k) mdl, grouping = qcmodel(prices, k_, B, mu, sigma, options.q) optim_dict["docplex_mod"] = mdl for i in range(options.n_trials): results = aggregator('optimizer', optim_dict) if results['is_qp_feasible']: break # Integer results (amount of groups of stocks): x x_val = [ results['result'].variables_dict[f'x{i}'] for i in range(len(prices)) ] # Amount of individual stock best_allocation = grouping * np.array(x_val) budget_spent = np.sum(best_allocation * prices) # Saves optimal parameters as initial point for next iteration last_optimal_params = results['solver_info']['optimal_params'] #printing tmp_results tmp_results = { 'computational_time': float(results['computational_time']), 'optimal_function_value': float(results['result'].fval), 'status': str(results['result'].status).split('.')[-1], 'is_qp_feasible': results['is_qp_feasible'] } # generate etf datapoint if budget_spent > B: # if budget spent is bigger than current liquidity tmp_results['wrong_results'] = { 'best_allocation_found': [int(i) for i in best_allocation], 'budget_spent': float(budget_spent) } if len(quantum_etf ) == 0: # at step 0, etf does not spend any budget quantum_etf[date_.strftime('%Y-%m-%d')] = { 'allocation': [0] * len(prices), 'prices': [float(p) for p in prices], 'liquidity': B, 'portfolio_value': 0., 'results': tmp_results, 'k': k_ } else: # at step k (any k), etf keeps previous allocation and updates its values with current prices quantum_etf[date_.strftime('%Y-%m-%d')] = { 'allocation': previous_month_etf['allocation'], 'prices': [float(p) for p in prices], 'liquidity': previous_month_etf['liquidity'], 'portfolio_value': float( np.sum( np.array(previous_month_etf['allocation']) * prices)), 'results': tmp_results, 'k': k_ } else: # if budget spent < B tmp_results['wrong_results'] = None quantum_etf[date_.strftime('%Y-%m-%d')] = { 'allocation': [int(i) for i in best_allocation], 'prices': [float(p) for p in prices], 'liquidity': float(B - budget_spent), 'portfolio_value': float(budget_spent), 'results': copy(tmp_results), 'k': k_ } if not os.path.exists(options.savepath): os.makedirs(options.savepath) fs = os.path.join(options.savepath, f'quantum_etf_results.json') with open(fs, 'wt') as qf: json.dump(quantum_etf, qf) # next datapoint date_ = next_month(date_) # building the optimum and real etf # steps separated for clarity only opt_etf = {} real_etf = {} date_ = copy(computation_date) while date_ < end_date: # generate main values for the model prices_, mu_, sigma_ = generate_values(stock_tickers=stock_tickers + ['IFV'], start_date=start_date, end_date=date_) # values for quantum etf prices = prices_[:-1] mu = mu_[:-1] sigma = sigma_[:-1, :-1] # price of real etf ifv_price = prices_[-1] # find budget, best allocation, results if len(opt_etf) == 0: B = copy(options.budget) # real values no_real_bought_stocks = floor(B / ifv_price) real_liquidity = B - no_real_bought_stocks * ifv_price else: previous_month_etf = copy( opt_etf[previous_month(date_).strftime('%Y-%m-%d')]) B = previous_month_etf['liquidity'] + np.sum( np.array(previous_month_etf['allocation']) * prices) if options.qbits_limit == 'true': qbits_dict = { k0: max(l0) for k0, l0 in dict_inverse({ j: model_qbits(prices, j, B) for j in range(1, options.max_k) }).items() } if options.max_qbits in qbits_dict: k_ = qbits_dict[options.max_qbits] elif options.max_qbits > max(qbits_dict): k_ = copy(options.max_k) warn( "Number of qbits provided exceeds qbits dictionary. Initialising k as the maximum given." ) else: warn( 'Number of qbits given not found among possible models.Choosing k equal to given value' ) k_ = copy(options.k) else: k_ = copy(options.k) mdl, grouping = qcmodel(prices, k_, B, mu, sigma, options.q) # solver mdl.solve() sols_dict = dict( zip([f'x{i}' for i in range(len(prices))], [0] * len(prices))) for j, v in mdl.solution.as_name_dict().items(): sols_dict[j] = int(v) x_val = list(sols_dict.values()) # Amount of individual stock best_allocation = grouping * np.array(x_val) budget_spent = np.sum(best_allocation * prices) # generate etf datapoint if budget_spent > B: # if budget spent is bigger than current liquidity if len(opt_etf) == 0: # at step 0, etf does not spend any budget opt_etf[date_.strftime('%Y-%m-%d')] = { 'allocation': [0] * len(prices), 'prices': [float(p) for p in prices], 'liquidity': B, 'portfolio_value': 0., 'objective_value': mdl.solution.get_objective_value() } else: # at step k (any k), etf keeps previous allocation and updates its values with current prices opt_etf[date_.strftime('%Y-%m-%d')] = { 'allocation': previous_month_etf['allocation'], 'prices': [float(p) for p in prices], 'liquidity': previous_month_etf['liquidity'], 'portfolio_value': float( np.sum( np.array(previous_month_etf['allocation']) * prices)), 'objective_value': mdl.solution.get_objective_value() } else: # if budget spent < B opt_etf[date_.strftime('%Y-%m-%d')] = { 'allocation': [int(i) for i in best_allocation], 'prices': [float(p) for p in prices], 'liquidity': float(B - budget_spent), 'portfolio_value': float(budget_spent), 'objective_value': mdl.solution.get_objective_value() } # real etf real_etf[date_.strftime('%Y-%m-%d')] = { 'liquidity': real_liquidity, 'prices': float(ifv_price), 'portfolio_value': no_real_bought_stocks * ifv_price } # etf is saved at every step fs_real = os.path.join(options.savepath, f'real_etf_results.json') with open(fs_real, 'wt') as rf: json.dump(real_etf, rf) fs_opt = os.path.join(options.savepath, f'opt_etf_results.json') with open(fs_opt, 'wt') as rf: json.dump(opt_etf, rf) # next datapoint date_ = next_month(date_) # clearing notebook output clear_output() etfs['quantum'] = copy(quantum_etf) etfs['optimum'] = copy(opt_etf) etfs['IFV'] = copy(real_etf) print_etfs(etfs, savepath=options.savepath) return None