Beispiel #1
0
def get_expression_profile(expression_level, genes, expression_bins,
                           input_format, output_format, species, tmp,
                           symmetric_expression):
    df = pd.DataFrame({'genes': genes, 'expression_level': expression_level})
    df = df[df.iloc[:, 1].notna()]
    df = df.sort_values(by=df.columns[1])
    expression_level = np.array(df.iloc[:, 1])
    if symmetric_expression:
        left = MI.discretize(expression_level[expression_level < 0],
                             expression_bins // 2)
        right = MI.discretize(expression_level[expression_level >= 0],
                              expression_bins // 2 + expression_bins % 2)
        right += expression_bins // 2
        expression_profile = np.concatenate((left, right))
    else:
        expression_profile = MI.discretize(expression_level, expression_bins)

    genes = list(df.iloc[:, 0])
    genes = [gene.split('.')[0] for gene in genes]
    if input_format and output_format and input_format != output_format:
        genes = change_accessions(genes, input_format, output_format, species,
                                  tmp)
        gene_dict = dict(zip(genes, expression_profile))
        expression_profile = np.array(
            [gene_dict[gene] for gene in gene_dict.keys() if gene != '-'])
        genes = [gene for gene in gene_dict.keys() if gene != '-']
    return expression_profile, genes