Example #1
0
def _parse_mode(mode):
    options = ['r', 'w', 'tb', (False, True),
               'sdmp', (SOCK_STREAM, SOCK_DGRAM, SOCK_RDM, SOCK_SEQPACKET), 
               '@']
    modeset = set(mode)
    if len(modeset) != len(mode):
        raise ValueError("invalid mode: %r" % mode)
    retvals = ['']
    while options:
        optstr = options.pop(0)
        if len(optstr) > 1:
            opt = concat(modeset.intersection(optstr)) or optstr[0]
            if len(opt) > 1:
                raise ValueError("mode can only have one of %r" % optstr)
            optvals = options.pop(0)
            val = optvals[optstr.index(opt)]
        else:
            val = optstr in modeset
            opt = optstr if val else ''
        modeset.discard(opt)
        retvals[0] += opt
        retvals.append(val)
    if modeset:
        raise ValueError("invalid mode options: %r" % concat(modeset))
    return retvals
Example #2
0
def get_data_cluster(train_data, test_data, submit_file, sorted_by):
    '''
    将train_data, test_data(如果存在submit_file则为test_data添加标签)混合并按sorted_by排序,生成Data_Cluster.csv文件
    :param train_data:
    :param test_data:
    :param submit_file:
    :param sorted_by:
    :return:
    '''
    train_data["text"] = train_data.apply(
        lambda x: concat(x["title"], x["text"]), axis=1)
    test_data["text"] = test_data.apply(
        lambda x: concat(x["title"], x["text"]), axis=1)
    train_data["category"] = "Train"
    test_data["category"] = "Test"

    if os.path.exists(os.path.join(data_path, "submit", submit_file)):
        submit = pd.read_csv(os.path.join(data_path, "submit", submit_file),
                             encoding='utf-8')
        test_data = pd.merge(test_data, submit, on='id', how='left')
    else:
        test_data["negative"] = 0
        test_data["key_entity"] = ""

    data = pd.concat((train_data[[
        "id", "text", "category", "negative", "entity", "key_entity"
    ]], test_data[[
        "id", "text", "category", "negative", "entity", "key_entity"
    ]]),
                     axis=0).reset_index(drop=True)

    data.sort_values(by=sorted_by, inplace=True)
    data.to_csv(os.path.join(data_path, "Data_Cluster.csv"),
                encoding='utf-8',
                index=False)
Example #3
0
    def visit_Output(self, node, frame):
        body = []
        for child in node.nodes:
            try:
                const = child.as_const(frame.eval_ctx)
            except nodes.Impossible:
                body.append(child)
                continue

            try:
                if frame.eval_ctx.autoescape:
                    if hasattr(const, '__html__'):
                        const = const.__html__()
                    else:
                        const = escape(const)
                const = str(const)
            except Exception:
                body.append(child)
                continue

            if body and isinstance(body[-1], list):
                body[-1].append(const)
            else:
                body.append([const])
        # write a format string for the body
        format = []
        arguments = []
        for item in body:
            if isinstance(item, list):
                format.append(concat(item).replace('%', '%%'))
            else:
                format.append('%s')  # TOFIX: item is a tuple/list/iterable
                arguments.append(item)
        self.writeline('yield ')
        self.write(repr(concat(format)))
        if arguments:
            self.write(' % (')
            self.indent()
            for argument in arguments:
                self.newline(argument)
                close = 0
                if frame.eval_ctx.autoescape:
                    self.write('escape(')
                    close += 1
                self.visit(argument, frame)
                self.write(')' * close)
            self.outdent()
            if len(arguments) == 1:
                # trailing comma for tuple with len 1
                self.write(',')
            self.writeline(')')
Example #4
0
def plot_mono_vs_di_likelihood(ll_dict = None):
    if ll_dict is None:
        ll_dict = likelihood_dict()
    normed_dict = {tf:tuple(map(lambda x:x/float(len(getattr(Escherichia_coli,tf))*len(getattr(Escherichia_coli,tf)[0])),(mono,di))) for (tf,(mono,di)) in ll_dict.items()}
    plt.scatter(*transpose(ll_dict.values()))
    for (tf,(mono,di)) in ll_dict.items():
        sites = getattr(Escherichia_coli,tf)
        text = "%s\n#:%s\nw:%s\nIC:%1.2f" % (tf,len(sites),len(sites[0]),motif_ic(sites))
        plt.annotate(text,(mono,di))
    min_val = min(concat(ll_dict.values()))
    max_val = max(concat(ll_dict.values()))
    plt.xlabel("Mono LL")
    plt.ylabel("Di LL")
    plt.plot([min_val,max_val],[min_val,max_val],linestyle="--")
Example #5
0
def get_train_and_validate_data(train_file, validate_file, negative_ratio=None):
	df_train = pd.read_csv(train_file)

	if negative_ratio:
		positive = df_train[df_train.label == 1]
		negative = df_train[df_train.label == 0].sample(positive.shape[0] * negative_ratio)
		df_train = utils.concat([positive, negative]).sample(frac=1).reset_index(drop=True)

	df_train = utils.preprocess_none_order_data(df_train)
	X_train = df_train.drop(['label'], axis=1)
	y_train = df_train['label']

	del df_train
	gc.collect()

	df_validate = pd.read_csv(validate_file)
	Y_validate = df_validate[[UID, 'label']]

	df_validate = utils.preprocess_none_order_data(df_validate)
	X_validate = df_validate.drop(['label'], axis=1)

	del df_validate
	gc.collect()

	print 'train columns', len(X_train.columns)
	return X_train, y_train, X_validate, Y_validate
Example #6
0
 def parse_primary(self):
     token = self.token_stream.current
     lineno = token.lineno
     if token.value in ('True', 'False'):
         next(self.token_stream)
         return nodes.Const(token.value in ('True', 'False'), lineno=lineno)
     elif token.type is tokens.INTEGER:
         next(self.token_stream)
         return nodes.Const(int(token.value), lineno=lineno)
     elif token.type is tokens.FLOAT:
         next(self.token_stream)
         return nodes.Const(float(token.value), lineno=lineno)
     elif token.type is tokens.STRING:
         buffer = [next(self.token_stream).value]
         while self.token_stream.current.type is tokens.STRING:
             buffer.append(next(self.token_stream).value)
         return nodes.Const(concat(buffer), lineno=lineno)
     elif token.type is tokens.NAME:
         next(self.token_stream)
         return nodes.Name(token.value, 'load', lineno=lineno)
     elif token.type is tokens.LPAREN:
         next(self.token_stream)
         node = self.parse_tuple(explicit_parens=True)
         self.token_stream.expect(tokens.RPAREN)
     elif token.type is tokens.LBRACKET:
         node = self.parse_list()
     elif token.type is tokens.LBRACE:
         node = self.parse_dict()
     else:
         self.fail('unexpected character %r' % token.value, lineno)
     return node
Example #7
0
 def render(self, *args, **kwargs):
     vars = dict(*args, **kwargs)
     try:
         ctx = self.new_context(vars)
         return concat(self.root_render_func(ctx))
     except Exception:
         raise
Example #8
0
def get_entity_data(data):
    data = data[data["negative"] == 1]
    data["text"] = data.apply(lambda x: concat(x['title'], x['text']), axis=1)

    id = []
    text = []
    entities = []
    label = []
    for i in range(len(data)):
        entity = data["entity"].iloc[i].split(';')
        try:
            key_entity = data["key_entity"].iloc[i].split(';')
        except Exception as error:
            key_entity = []
        for e in entity:
            if e is "":
                continue
            id.append(data["id"].iloc[i])
            text.append(data["text"].iloc[i])
            entities.append(e)
            if e in key_entity:
                label.append(1)
            else:
                label.append(0)
    entity_data = pd.DataFrame({
        "id": id,
        "text": text,
        "entity": entities,
        "label": label
    })

    return entity_data
Example #9
0
 def __init__(self, domain_name, *items, relations=None):
     relations = relations or []
     self.name = domain_name
     self.items = list(set(list(items) + concat(relations)))
     self.relations = normalize_relations((relations) + [
         (BOTTOM, item) for item in self.items
     ])
def get_deckHanzi(config, session, DeckSeen):
    # Get Hanzi from the database. This function has been carefully tuned to try and get good
    # performance, so be careful before you modify it! In particular:
    #  1) Doing *everything* in one huge query didn't work so well - perhaps sqlite's execution
    #     is not so good?
    #  2) It's essential to trim the amount of text you look at by only looking at fields with names
    #     like the ones we know about and suspect will contain Hanzi
    
    # Find all card models which come from Mandarin models
    cardmodels = session.all("select cardModels.id, cardModels.modelId, cardModels.qformat from cardModels, models where cardModels.modelId = models.id AND models.tags LIKE %s" % utils.toSqlLiteral("%" + config.modelTag + "%"))
    # Find the field names that are included in the *question field* of such cards
    cardmodelsfieldsnames = [(cmid, modelid, set([res["mappingkey"] for res in utils.parseFormatString(qformat) if isinstance(res, dict)])) for cmid, modelid, qformat in cardmodels]
    # Filter out unpromising names, and turn the remainder into the IDs of field models
    eligiblefields = set(utils.concat([config.candidateFieldNamesByKey[key] for key in ['expression', 'mw', 'trad', 'simp']]))
    cardmodelsfields = [(cmid, [session.scalar("select fieldModels.id from fieldModels where fieldModels.name = :name and fieldModels.modelId = :mid", name=fmname, mid=modelid) for fmname in fmnames if fmname in eligiblefields]) for cmid, modelid, fmnames in cardmodelsfieldsnames]
    
    # Look up the contents of fields whose Ids we found in the previous step, optionally only including
    # those whose corresponding card has been seen at least once
    hanziss = session.column0("SELECT fields.value FROM cards, fields WHERE cards.factId = fields.factId %s AND (%s)" % \
                              ((DeckSeen == 0) and "AND cards.reps > 0" or "", # Only look for seen cards if we are in that mode
                               " OR ".join(["(cards.cardModelId = %s AND fields.fieldModelID IN %s)" % (utils.toSqlLiteral(cmid), utils.toSqlLiteral(fmids)) for cmid, fmids in cardmodelsfields])))
    
    # Flatten everything into a set with *no intermediate structures*
    allhanzis = set()
    for hanzis in hanziss:
        allhanzis.update([c for c in hanzis if utils.isHanzi(c)])
    
    return allhanzis
Example #11
0
    def predict_cases(self, n_steps, ibge_id, debug=False):
        def printVertexes(step, fileOut, vertexes):
            for vertex in vertexes:
                fileOut.write(
                    concat(
                        [step, vertex['id'], vertex['name'], vertex['value']],
                        ','))

        if debug:
            fileOut = open(
                'logs/' + datetime.now().strftime("%Y-%m-%d_%H:%M:%S") +
                '.csv', 'w')
            fileOut.write(concat(['step', 'ibge', 'name', 'newCases'], ','))

        cumulative_cases_city = []

        sum = 0
        for i in range(n_steps):

            sum += self.graph.vs[self.dict_ibge_index[ibge_id]]['value']
            cumulative_cases_city.append(sum)

            if debug:
                printVertexes(i, fileOut, self.graph.vs)

            self.autoUpdateCases(i + 1, ibge_id)

        if debug:
            fileOut.close()

        return cumulative_cases_city
def get_fileHanzi(file):
    try:
        f = codecs.open(file, "r", "utf8")
        return set(utils.concat([[c for c in line if utils.isHanzi(c)] for line in f.readlines()]))
    except IOError, e:
        log.exception("Error reading hanzi statistics character file " + file)
        return set()
Example #13
0
    def generate(self):
        result_pattern = ""
        if self.__option == 'LETTER':
            up, down, left, right = (True, True, True,
                                     True) if self.__framed else (False, False,
                                                                  False, False)
            if self.__direction == 'VERTICAL':
                for i, ch in enumerate(self.__pattern):
                    up = True if i == 0 and self.__framed else False
                    result_pattern += PatternGenerator(ch, self.__option, self.__style, self.__size)\
                        .generate(up, down, left, right)
            elif self.__direction == 'HORIZONTAL':
                for i, ch in enumerate(self.__pattern):
                    left = True if i == 0 and self.__framed else False
                    result_pattern = utils.concat(
                        result_pattern,
                        PatternGenerator(ch, self.__option, self.__style,
                                         self.__size).generate(
                                             up, down, left, right))
            else:
                raise NotImplementedError(
                    'Direction only has vertical and horizontal options')
        else:
            raise NotImplementedError(
                'Only letter option is available, right now')

        return result_pattern
Example #14
0
def make_correlation_structure_by_length():
    q = fdr(concat(euk_tests))
    plt.close() # get rid of output from cluster_motif
    lens = map(len, euk_motifs)
    jss = [indices_where(lens, lambda x:10**i <= x < 10**(i+1)) for i in range(1, 4+1)]
    for i,js in tqdm(enumerate(jss)):
        analyze_mi_tests2(rslice(euk_tests, js), rslice(euk_motifs, js), label=str("10**%s" % (i+1)), q=q)
Example #15
0
 def get_relations(self):
     return concat([
         [
             self._bind_domain(arg, relation)
             for relation in arg.get_relations()
         ]
         for arg in self.args
     ])
Example #16
0
def analyze_mi_tests2(tests, motifs, q=None, label=None):
    q = fdr(concat(tests))
    correlated_percentage = count(lambda x:x <= q,(concat(tests)))/float(len(concat(tests)))
    ds = [[j - i for (i, coli), (j,colj) in choose2(list(enumerate(transpose(motif))))]
               for motif in motifs]
    def binom_ci(xs):
        """return width of error bar"""
        bs_means = sorted([mean(bs(xs)) for x in range(1000)])
        mu = mean(xs)
        return (mu - bs_means[25], bs_means[975] - mu)
    tests_by_dist = [[t <= q for t,d in zip(concat(tests), concat(ds)) if d == i] for i in range(1, 20)]
    mean_vals = map(lambda xs:mean(xs) if xs else 0, tests_by_dist)
    cis = map(lambda xs:binom_ci(xs) if xs else (0,0), tests_by_dist)
    plt.errorbar(range(1,20),
                 mean_vals,yerr=transpose(cis),label=label,capthick=1)
    plt.xlabel("Distance (bp)",fontsize="large")
    plt.ylabel("Proportion of Significant Correlations",fontsize="large")
    plt.legend()
Example #17
0
def cell_likelihood(reads,ps):
    points = sorted(concat(reads))
    G = len(ps)
    if not 0 in points:
        points.append(0)
    if not G in points:
        points.append(G)
    read_complements = [(stop)]
    return product([product(1-p for p in ps[start:stop]) for (start,stop) in reads])
Example #18
0
def get_fileHanzi(file):
    try:
        f = codecs.open(file, "r", "utf8")
        return set(
            utils.concat([[c for c in line if utils.isHanzi(c)]
                          for line in f.readlines()]))
    except IOError, e:
        log.exception("Error reading hanzi statistics character file " + file)
        return set()
def discrete_parallelogram_plot(filename=None):
    motifs = concat([maxent_motifs_with_ic(200,10,ic,10) for ic in tqdm(np.linspace(0.5,19.5,100))])
    ics = map(motif_ic,motifs)
    mis = map(total_motif_mi,motifs)
    plt.scatter(ics,mis)
    plt.xlabel("IC (bits)")
    plt.ylabel("Pairwise MI (bits)")
    plt.title("IC vs Pairwise MI for MaxEnt Motifs")
    maybesave(filename)
Example #20
0
def make_correlation_structure_by_cluster_figure():
    from motif_clustering import cluster_motif
    q = fdr(concat(euk_tests))
    euk_clusterses = [map(cluster_motif, tqdm(euk_motifs)) for i in range(3)]
    plt.close() # get rid of output from cluster_motif
    mean_lens = map(lambda xs:round(mean(xs)), transpose([map(len,cs) for cs in euk_clusterses]))
    jss = [indices_where(mean_lens, lambda x:x==i) for i in range(1, 5+1)]
    for i,js in tqdm(enumerate(jss)):
        analyze_mi_tests2(rslice(euk_tests, js), rslice(euk_motifs, js), label=str(i+1), q=q)
Example #21
0
def pop_estimator(obs):
    """Given a vector of observed species counts obs,
    estimate total number of species by bs of shannon entropy"""
    N = float(sum(obs))
    sample = concat([[i for _ in range(v)] for i,v in enumerate(obs)])
    def resample_pop():
        re_obs = Counter(bs(sample)).values()
        return 2**h([v/N for v in re_obs])
    return [resample_pop() for i in range(100)]
 def match(self, x):
     input_text = [
         load_text(i, self.max_sentence_len, self.input2idx, self.choice)
         for i in x
     ]
     input_text = np.asarray(input_text)
     res = self.model.predict(input_text)
     res = concat(res)
     res = self.decode(res, True)
     return res
Example #23
0
def parse_network(fname,connectivity=1):
    """parse network and return graph object"""
    with open(fname) as f:
        raw_lines = [line.strip().split(',') for line in f.readlines()]
    lines = [(source,target,int(sgn)) for (source,sgn,target) in raw_lines]
    all_names = list(set(concat([(source,target) for source,target,sgn in lines])))
    idx_from_name = {name:i for (i,name) in enumerate(all_names)}
    name_from_idx = {i:name for (name,i) in idx_from_name.items()}
    processed_lines = [(idx_from_name[src],idx_from_name[trg],sgn) for (src,trg,sgn) in lines]
    return NetStruct(processed_lines,name_from_idx,connectivity=connectivity)
Example #24
0
def _make_sockaddr(family, addr):
    family = _ADDRFAMILY_MAP.get(family, '<unknown>')
    if isinstance(addr, tuple):
        acc = []
        for val in addr:
            val = str(val) if val is not None else ''
            if ':' in val:
                val = '[' + val + ']'
            acc.append(val)
        addr = concat(acc, ':')
    return ':{}:{}'.format(family, addr)
def analyze_column_frequencies():
    """Do columnwise frequencies reveal stable patterns that could be
explained by amino acid preferences?"""
    def dna_freqs(xs):
        return [xs.count(b)/float(len(xs)) for b in "ACGT"]
    all_freqs = concat([map(dna_freqs,transpose(getattr(tfdf_obj,tf)))
                         for tf in tfdf_obj.tfs])
    for k,(i,j) in enumerate(choose2(range(4))):
        plt.subplot(4,4,k)
        cols = transpose(all_freqs)
        plt.scatter(cols[i],cols[j])
Example #26
0
 def __repr__(self):
     rv = [self.__class__.__name__, '(']
     for idx, k in enumerate(self.fields):
         v = getattr(self, k)
         if isinstance(v, str):
             v = "'{}'".format(v.replace('\n', '\\n'))
         rv.append('{0}={1}'.format(k, v))
         if idx != len(self.fields) - 1:
             rv.append(', ')
     rv.append(')')
     return concat(rv)
Example #27
0
 def __init__(self,adjacencies,names=None,connectivity=1):
     """adjacencies describes signed directed graph, i.e. edge i -> j
     encoded as (i,j,1), i -| j encoded as (i,j,-1) names is a
     dictionary of the form {i:i_name}.
     """
     self.V = max(concat([[i,j] for (i,j,sgn) in adjacencies])) + 1
     self.adjs = adjacencies
     self.names = names
     self.graph = nx.DiGraph([(src,trg) for (src,trg,sgn) in self.adjs])
     #self.mat = scp.sparse.dok_matrix((self.V,self.V))
     self.mat = np.zeros((self.V,self.V))
     for (src,trg,sgn) in self.adjs:
         self.mat[src,trg] = sgn
Example #28
0
def main():
    xoffset = 0
    yscale = 'log'
    data_path = 'datasets/data'
    output_path = 'results'
    spls = ['BSN']
    labels = ['Reana', 'ReanaE']

    if len(sys.argv) > 2:
        data_path = sys.argv[1]
        output_path = sys.argv[2]
        spls = sys.argv[3:]
        print(f'data_path: {data_path}')
        print(f'output_path: {output_path}')
        print(f'spls: {spls}')
    try:
        mkdir(output_path)
    except OSError as error:
        pass
    dirs = ['graphs', 'boxplots', 'pairwise-graphs', 'tables', 'tables/effect-size', 'tables/summary']
    for path in dirs:
        try:
            mkdir(f'{output_path}/{path}')
        except OSError as error:
            pass

    # convert data to csv
    rt_data = concat([[f'running_time/totalTime{spl}{label}' for spl in spls] for label in labels])
    mem_data = concat([[f'memory_usage/totalMemory{spl}{label}' for spl in spls] for label in labels])

    for filename in rt_data:
        out_to_csv(f'{data_path}/{filename}.out', f'csv/{filename}.csv')

    for filename in mem_data:
            out_to_csv(f'{data_path}/{filename}.out', f'csv/{filename}.csv')

    for spl in spls:
        plot_spl(spl, labels, xoffset=xoffset, yscale=yscale, output_path=output_path)
        get_pairwise_graphs(spl, labels, xoffset=xoffset, yscale=yscale, output_path=output_path)
    def handle(self, *args, **kwargs):
        ano = 2018
        logger.debug('Downloading TSE {ano}...'.format(ano=ano))
        base = 'http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_cand/consulta_cand_{ano}.zip'  # noqa
        url = base.format(ano=ano)
        download = download_file(url)
        df = concat(download)
        logger.debug('total candidates: {shape}'.format(shape=df.shape))

        fetch_parallel(df)

        logger.debug(
            f'candidates: {Candidate.objects.count()} expected: {df.shape[0]}')
def get_train_and_validate_data_from_cache(train_file, validate_file, only_validate=False, is_concat=False):
	print 'get_data train_file:{}, validate_file:{}, only_validate: {}, concat: {}'.format(
		train_file, validate_file, only_validate, is_concat)

	dtypes = dict(
		label=np.float32,
	)
	embedings = list(range(32))
	for col in embedings:
		dtypes[col] = np.float32

	useless_cols = [
		'department_product_count',
		'department_order_count',
		'department_order_dow_mean',
		'department_days_since_prior_order_mean',
		'department_add_to_cart_order_mean',
		'department_bought_times',
		'department_reorder_ratio',

		'aisle_add_to_cart_order_mean',
		'aisle_product_count',
		'aisle_days_since_prior_order_mean',
		'aisle_order_hour_of_day_mean',
		'aisle_order_dow_mean',
		'aisle_bought_times',
		'aisle_reorder_ratio',
	]

	if not only_validate or is_concat:
		df_train = pd.read_csv(train_file, compact_ints=True, dtype=dtypes)
		y_train = df_train['label']
		df_train.drop(['label']+useless_cols, axis=1, inplace=True)
	else:
		y_train = None
		df_train = None

	# df_validate, y_validate = None, None
	df_validate = pd.read_csv(validate_file, compact_ints=True, dtype=dtypes)
	y_validate = df_validate['label']
	df_validate.drop([UID, 'label']+useless_cols, axis=1, inplace=True)

	if is_concat:
		df_train = utils.concat([df_train, df_validate])
		df_validate = None
		y_train = y_train.append(y_validate, ignore_index=True)
		y_validate = None
		gc.collect()

	return df_train, y_train, df_validate, y_validate
Example #31
0
def mean_field_hs(Vs, K):
    """

    Pj(xj) = 1/Z0 *exp(-beta*hj(xj)), where
    hj(xj) = \sum_{<j,jp>} \sum_{xjp \in jp} V(xj,xjp)*Pjp(xjp)

    We assume a Potts model of m variables x0...xj...xm-1 where each
    variable can take on K states 0...i...K-1.  Mean field functions h
    are represented as a matrix hss where each row gives the values
    hj(i).  [Note that i,j are reversed from the usual row-column
    convention.]

    Input is a matrix Vs of pairwise contributions to the hamiltonian
    where Vs[j][jp] is a function V(xj,xjp)
    """
    M = len(Vs)
    jpairs = pairs(range(M))
    hs = [[1 for i in range(K)] for j in range(M)]

    def Pj(xj, j):
        # print xj,j
        return exp(-beta * hs[j][xj]) / sum(exp(-beta * hs[j][xjp]) for xjp in range(K))

    old_hs = matcopy(hs)
    while True:
        for j in range(M):
            for i in range(K):
                hs[j][i] = sum(sum(Vs[j][jp](i, ip) * Pj(ip, jp) for ip in range(K)) for jp in range(j + 1, M)) + sum(
                    sum(Vs[jp][j](ip, i) * Pj(ip, jp) for ip in range(K)) for jp in range(0, j - 1)
                )
        print l2(concat(hs), concat(old_hs))
        if old_hs == hs:
            break
        else:
            old_hs = matcopy(hs)
            print hs
    return hs
Example #32
0
 def fetch_row(self, table_name: str, row_number: int, column: int):
     if not self.has_table(table_name):
         return
     result = self._connection.execute(concat("select * from ", table_name))
     rows = result.fetchall()
     if row_number is -1 or row_number >= len(rows):
         row_number = self.row_count(table_name) - 2
     else:
         row_number -= 1
     if column >= self.column_count(table_name):
         raise IndexError("Colum index %d out of range %d " %
                          (column, len(result.keys())))
     if column is -1:
         return rows[row_number][:]
     return rows[row_number][column]
Example #33
0
    def simulate(self, batch, turn=3):
        # for batch in data.train_iter:
        input_message = batch.hist1
        history1 = input_message
        reward = 0
        for t in range(turn):
            agent = self.agent[(t % 2)]
            logits_matrix, decoder_out = agent.generate(
                input_message)  # type of decoder out = [data, lenght]
            reward += 1  # FIXME

            history2 = decoder_out
            input_message = concat(history1, history2)
            history1 = history2
        return reward
Example #34
0
    def play(self, max_episodes, max_episode_len):

        total_r = 0

        for i in range(max_episodes):

            # reset if terminated
            s = self.env.reset()
            g = self.env.sample_goal()
            self.env.render_goal()
            time.sleep(2)
            self.env.close_goal()

            # concat
            s_concat = concat(s, g)

            # write to monitor
            print('episode ' + str(i) + ' reward ' + str(total_r))
            total_r = 0

            for j in range(max_episode_len):

                # predict action
                a = self.actor.predict_target([s_concat])[0]

                # take action
                s_next, r, d, _ = self.env.step(a)
                self.env.render()
                s_next_concat = concat(s_next, g)
                total_r += r

                s_concat = s_next_concat
                if d:
                    break

            self.env.close()
Example #35
0
File: hmm.py Project: poneill/amic
def baum_welch(obs,L):
    """Given sequence and bs length L, approximate MLE parameters for
    emission probabilities,transition rate a01 (background->site).
    TODO: non-uniform background frequencies"""
    states = range(L+1)
    a01 = random.random()
    start_p = make_start_p(a01)
    trans_p = make_trans_p(a01)
    emit_p = [simplex_sample(4) for state in states]
    hidden_states = [random.choice(states) for ob in obs]
    iterations = 0
    while True:
        # compute hidden states, given probs
        prob,hidden_states_new = viterbi(obs, states, start_p, trans_p, emit_p)
        # compute probs, given hidden states
        # first compute a01
        a01_new = estimate_a01(hidden_states_new)
        start_p_new = make_start_p(a01_new)
        trans_p_new = make_trans_p(a01_new)
        emit_p_new = estimate_emit_p(obs,hidden_states_new,states)
        if (start_p_new == start_p and
            trans_p_new == trans_p and
            emit_p_new == emit_p and
            hidden_states_new == hidden_states):
            break
        else:
            print iterations,a01,l2(start_p,start_p_new),
            print l2(concat(trans_p),concat(trans_p_new)),
            print l2((hidden_states),hidden_states_new)
            a01 = a01_new
            start_p = start_p_new
            trans_p = trans_p_new
            emit_p = emit_p_new
            hidden_states = hidden_states_new
            iterations += 1
    return start_p,trans_p,emit_p,hidden_states
Example #36
0
def get_results(iy_goals_1, iy_goals_2, ms_goals_1, ms_goals_2, h1=0, h2=0):
    res = {}
    res['mac'] = (ms_goals_1 > ms_goals_2, ms_goals_1 == ms_goals_2,
                  ms_goals_1 < ms_goals_2)
    res['ilk'] = (iy_goals_1 > iy_goals_2, iy_goals_1 == iy_goals_2,
                  iy_goals_1 < iy_goals_2)

    h_goals_1 = ms_goals_1 + h1
    h_goals_2 = ms_goals_2 + h2
    res['han'] = (h_goals_1 > h_goals_2, h_goals_1 == h_goals_2,
                  h_goals_1 < h_goals_2)
    res['kar'] = (ms_goals_1 > 0 and ms_goals_2 > 0, ms_goals_1 == 0
                  or ms_goals_2 == 0)
    res['cif'] = (ms_goals_1 >= ms_goals_2, ms_goals_1 != ms_goals_2,
                  ms_goals_1 <= ms_goals_2)
    res['iy'] = (iy_goals_1 + iy_goals_2 > 1.5, iy_goals_1 + iy_goals_2 < 1.5)

    total = ms_goals_1 + ms_goals_2
    res['au1'] = (total > 1.5, total < 1.5)
    res['au2'] = (total > 2.5, total < 2.5)
    res['au3'] = (total > 3.5, total < 3.5)
    res['top'] = (total < 2, total >= 2 and total < 4, total >= 4
                  and total < 7, total >= 7)

    iy_diff = iy_goals_1 - iy_goals_2
    ms_diff = ms_goals_1 - ms_goals_2
    res['IYMS11'] = iy_diff > 0 and ms_diff > 0
    res['IYMS10'] = iy_diff > 0 and ms_diff == 0
    res['IYMS12'] = iy_diff > 0 and ms_diff < 0
    res['IYMS01'] = iy_diff == 0 and ms_diff > 0
    res['IYMS00'] = iy_diff == 0 and ms_diff == 0
    res['IYMS02'] = iy_diff == 0 and ms_diff < 0
    res['IYMS21'] = iy_diff < 0 and ms_diff > 0
    res['IYMS20'] = iy_diff < 0 and ms_diff == 0
    res['IYMS22'] = iy_diff < 0 and ms_diff < 0

    for i in range(6):
        for j in range(6):
            res['SK%d%d' % (i, j)] = ms_goals_1 == i and ms_goals_2 == j
            if i == 5:
                res['SK%d%d' % (i, j)] = ms_goals_1 > 5 and ms_goals_2 == j
            if j == 5:
                res['SK%d%d' % (i, j)] = ms_goals_1 == i and ms_goals_2 > 5

    res = concat([res[bet] for bet in BET_ORDER] +
                 [[res[bet] for bet in IYMS_ORDER + SK_ORDER]])

    return res
Example #37
0
def arca_motif_comparison():
    arca_reads = get_arca_reads()
    true_rdm = density_from_reads(arca_reads, G)
    pssm = make_pssm(Escherichia_coli.ArcA)
    plt.plot(true_rdm[0])
    plt.plot(true_rdm[1])
    fwd_scores, rev_scores = score_genome_np(pssm, genome)
    scores = np.log(np.exp(fwd_scores) + np.exp(rev_scores))
    sites = concat([(site, wc(site)) for site in Escherichia_coli.ArcA])
    site_locations = [m.start(0) for site in sites
                      for m in re.finditer(site, genome)]
    site_locations_np = np.zeros(G)
    for site_loc in site_locations:
        site_locations_np[site_loc] = 1
    plt.plot(site_locations_np)
    plt.plot(scores)
Example #38
0
def verify_checksum():
    computed = options.action
    for arg in args:
        computed = utils.concat(computed, arg)
    computed = hashlib.md5(computed).hexdigest()
    if options.checksum == computed:
        if options.debug > 1:
            print ("Valid helper action checksum. Received: " +
                   options.checksum + " Computed: " + computed)
        return True
    elif options.checksum == "SKIP":
        return True
    else:
        sys.stderr.write("Invalid action checksum! " +
                         "Received: "+str(options.checksum) + " - " +
                         "Expected: "+computed + "\n")
        return False
Example #39
0
def verify_checksum():
    computed = options.action
    for arg in args:
        computed = utils.concat(computed, arg)
    computed = hashlib.md5(computed).hexdigest()
    if options.checksum == computed:
        if options.debug > 1:
            print("Valid helper action checksum. Received: " +
                  options.checksum + " Computed: " + computed)
        return True
    elif options.checksum == "SKIP":
        return True
    else:
        sys.stderr.write("Invalid action checksum! " + "Received: " +
                         str(options.checksum) + " - " + "Expected: " +
                         computed + "\n")
        return False
Example #40
0
def make_ringer(code):
    """minimize eps(site) - mu + (sigma^2)/2"""
    def aa_mu(aa):
        return mean([code[aa, b1, b2] for b1, b2 in nuc_pairs])

    def aa_sigma(aa):
        return sqrt(variance([code[aa, b1, b2] for b1, b2 in nuc_pairs]))

    (aa, b1, b2), min_score = min(code.items(),
                                  key=lambda (
                                      (aa, b1, b2), score): score - aa_mu(aa) +
                                  (aa_sigma(aa)**2) / 2.0)
    bd = [aa] * (L - 1)
    sites = [
        "".join(concat([(b1, b2) for j in range(L / 2)])) for i in range(n)
    ]
    return bd, sites
def make_gle_evo_sim_spoofs(bio_motifs, trials_per_motif = 3):
    start_time = time.time()
    spoofs = []
    failures = 0
    for it, motif in enumerate(tqdm(bio_motifs, desc='bio_motifs')):
        bio_ic = motif_ic(motif)
        these_spoofs = [spoof_motif_gle(motif,num_motifs=10, Ne_tol=10**-2)
                        for i in range(trials_per_motif)]
        spoofs.append(these_spoofs)
        spoof_ics = map(motif_ic, concat(these_spoofs))
        lb, ub = mean_ci(spoof_ics)
        out_of_bounds = (not (lb <= bio_ic <= ub))
        failures += out_of_bounds
        fail_perc = failures/float(it+1)
        print it,"bio_ic:", bio_ic, "spoof_ci: (%s,%s)" % (lb, ub), "*" * out_of_bounds,"failures:","%1.2f" % fail_perc
    stop_time = time.time()
    print "total time:", stop_time  - start_time
    return spoofs
Example #42
0
    def __init__(self, md, weekid): # md is match_data
        self.weekID = weekid
        self.matchID = int(md[10])
        self.detailID = int(md[0])
        self.datetime = datetime.strptime(md[7] + " " + md[6], '%d.%m.%Y %H:%M')
        self.league = md[26]
        self.team_1 = md[1]
        self.team_2 = md[3]
        self.mbs = parse_int(md[13])
        self.iy_goals_1 = parse_int(md[11])
        self.iy_goals_2 = parse_int(md[12])
        if self.iy_goals_1 is None or self.iy_goals_2 is None:
	    self.iy_goals_1 = None
	    self.iy_goals_2 = None
	    self.ms_goals_1 = None
	    self.ms_goals_2 = None
        else:
            self.ms_goals_1 = parse_int(md[8])
            self.ms_goals_2 = parse_int(md[9])

        self.was_played = self.ms_goals_1 is not None
        self.h1 = 0 if md[14] == '' else int(md[14])
        self.h2 = 0 if md[15] == '' else int(md[15])
        self.ratios = []
        res = {}
        res['mac'] = [parse_float(x) for x in md[16:19]]
        res['ilk'] = [parse_float(x) for x in md[33:36]]
        res['han'] = [parse_float(x) for x in md[36:39]]
        res['kar'] = [parse_float(x) for x in md[39:41]]
        res['cif'] = [parse_float(x) for x in md[19:22]]
        res['iy'] = [parse_float(x) for x in md[42:44]]
        res['au1'] = [parse_float(x) for x in md[44:46]]
        res['au2'] = [parse_float(x) for x in md[22:24]]
        res['au3'] = [parse_float(x) for x in md[46:48]]
        res['top'] = [parse_float(x) for x in md[29:33]]
        
        if self.was_played:
            self.results = get_results(self.iy_goals_1, self.iy_goals_2,
                            self.ms_goals_1, self.ms_goals_2, self.h1, self.h2)

        self.ratios = concat([res[bet] for bet in BET_ORDER])
        if self.league != 'DUEL':
            self.fetch_details()
Example #43
0
def sanity_check():
    G = 10000
    config = [G/2]
    mfl = 250
    lamb = 1.0/mfl
    num_frags = 10000
    frags = concat([chip(G,config,mfl) for i in xrange(num_frags)])
    min_seq_length = 75
    sequenced_frags = filter(lambda (start,stop):stop - start > min_seq_length,frags)
    fd_frags,bk_frags = separate(lambda x:random.random() < 0.5,sequenced_frags)
    fd_reads = [('+',start,start+min_seq_length) for (start,stop) in fd_frags]
    bk_reads = [('-',stop-min_seq_length,stop) for (start,stop) in bk_frags]
    reads = fd_reads + bk_reads
    inferred_frags = exp_reconstruction(reads,lamb,G)
    plot_reads(reads,G=G)
    plt.plot(frag_density(frags,G=G),label="all frags")
    plt.plot(frag_density(sequenced_frags,G=G),label="seq frags")
    plt.plot((inferred_frags),label="inferred frags")
    plt.legend()
Example #44
0
def get_results(iy_goals_1, iy_goals_2, ms_goals_1, ms_goals_2, h1=0, h2=0):
    res = {}
    res['mac'] = (ms_goals_1 > ms_goals_2, ms_goals_1 == ms_goals_2, ms_goals_1 < ms_goals_2)
    res['ilk'] = (iy_goals_1 > iy_goals_2, iy_goals_1 == iy_goals_2, iy_goals_1 < iy_goals_2)

    h_goals_1 = ms_goals_1 + h1
    h_goals_2 = ms_goals_2 + h2
    res['han'] = (h_goals_1 > h_goals_2, h_goals_1 == h_goals_2, h_goals_1 < h_goals_2)
    res['kar'] = (ms_goals_1 > 0 and ms_goals_2 > 0, ms_goals_1 == 0 or ms_goals_2 == 0)
    res['cif'] = (ms_goals_1 >= ms_goals_2, ms_goals_1 != ms_goals_2, ms_goals_1 <= ms_goals_2)
    res['iy'] = (iy_goals_1 + iy_goals_2 > 1.5, iy_goals_1 + iy_goals_2 < 1.5)

    total = ms_goals_1 + ms_goals_2
    res['au1'] = (total > 1.5, total < 1.5)
    res['au2'] = (total > 2.5, total < 2.5)
    res['au3'] = (total > 3.5, total < 3.5)
    res['top'] = (total < 2, total >= 2 and total < 4, total >= 4 and total < 7, total >= 7)

    iy_diff = iy_goals_1 - iy_goals_2
    ms_diff = ms_goals_1 - ms_goals_2
    res['IYMS11'] = iy_diff > 0 and ms_diff > 0
    res['IYMS10'] = iy_diff > 0 and ms_diff == 0
    res['IYMS12'] = iy_diff > 0 and ms_diff < 0
    res['IYMS01'] = iy_diff == 0 and ms_diff > 0
    res['IYMS00'] = iy_diff == 0 and ms_diff == 0
    res['IYMS02'] = iy_diff == 0 and ms_diff < 0
    res['IYMS21'] = iy_diff < 0 and ms_diff > 0
    res['IYMS20'] = iy_diff < 0 and ms_diff == 0
    res['IYMS22'] = iy_diff < 0 and ms_diff < 0

    for i in range(6):
        for j in range(6):
            res['SK%d%d' % (i, j)] = ms_goals_1 == i and ms_goals_2 == j
            if i==5:
                res['SK%d%d' % (i, j)] = ms_goals_1 > 5 and ms_goals_2 == j
            if j==5:
                res['SK%d%d' % (i, j)] = ms_goals_1 == i and ms_goals_2 > 5

    res = concat([res[bet] for bet in BET_ORDER] + [[res[bet] for bet in IYMS_ORDER + SK_ORDER]])

    return res
Example #45
0
def recovery():
    G = 10000
    config = [G/2]
    mfl = 250
    lamb = 1/float(mfl)
    num_frags = 1000
    frags = concat([chip(G,config,mfl) for i in xrange(num_frags)])
    min_seq_length = 75
    sequenced_frags = filter(lambda (start,stop):stop - start > min_seq_length,frags)
    fd_frags,bk_frags = separate(lambda x:random.random() < 0.5,sequenced_frags)
    fd_reads = [('+',start,start+75) for (start,stop) in fd_frags]
    bk_reads = [('-',stop-75,stop) for (start,stop) in bk_frags]
    reads = fd_reads + bk_reads
    hyp0 = [int(random.random() < 0.5) for i in range(G)]
    def f(hyp):
        return log_likelihood(reads,hyp,lamb,G)
    def prop(hyp):
        i = random.randrange(G)
        hyp_copy = hyp[:]
        hyp_copy[i] = 1 - hyp_copy[i]
        return hyp_copy
    chain = mh(f,prop,hyp0,use_log=True,verbose=True)
def prokaryotic_gini_comparison(filename=None):
    """spoof prokaryotic motifs using maxent, uniform and GLE evosims,
    showing gini is higher in GLE than in maxent, uniform"""
    maxent_spoofs = [spoof_motifs_maxent(motif,10,verbose=True)
                     for motif in tqdm(bio_motifs,desc='bio_motifs')]
    uniform_spoofs = [spoof_motifs_uniform(motif,10,verbose=True)
                     for motif in tqdm(bio_motifs,desc='bio_motifs')]
    oo_spoofs = [spoof_motifs_oo(motif,10)
                     for motif in tqdm(bio_motifs,desc='bio_motifs')]
    gle_spoofs = [concat([spoof_motif_gle(motif,10,verbose=True) for i in range(1)])
                  for motif in tqdm(bio_motifs,desc='bio_motifs')]
    maxent_ginis = [mean(map(motif_gini,spoofs)) for spoofs in maxent_spoofs]
    uniform_ginis = [mean(map(motif_gini,spoofs)) for spoofs in uniform_spoofs]
    gle_ginis = [mean(map(motif_gini,spoofs)) for spoofs in gle_spoofs]
    plt.subplot(1,2,1)
    scatter(maxent_ginis,gle_ginis)
    plt.xlabel("MaxEnt")
    plt.ylabel("GLE")
    plt.subplot(1,2,2)
    plt.xlabel("TU")
    scatter(uniform_ginis,gle_ginis)
    plt.suptitle("Gini Coefficients for GLE Simulations vs. MaxEnt, TU Distributions")
    maybesave(filename)
Example #47
0
def make_chip_dataset(num_cells):
    return concat([chip(genome,rfd_xs(ps),MEAN_FRAGMENT_LENGTH) for i in verbose_gen(xrange(num_cells))])
 def all_spoof_stats(fname,order_by_stat="motif_ic"):
     ordered_tfs = order_tfs_by(order_by_stat)
     return concat([[results_dict[tf][spoof_name][fname]
                          for spoof_name in spoof_names] for tf in ordered_tfs])
Example #49
0
    
    # Only decks with these tags in them are processed
    "modelTags" : "Mandarin",

    # Field names are listed in descending order of priority
    "candidateFieldNamesByKey" : utils.let(
            ["MW", "Measure Word", "Classifier", "Classifiers", u"量词"],
            ["Audio", "Sound", "Spoken", u"声音"],
            lambda mwfields, audiofields: {
        'expression' : ["Expression", "Hanzi", "Chinese", "Character", "Characters", u"汉字", u"中文"],
        'reading'    : ["Reading", "Pinyin", "PY", u"拼音"],
        'meaning'    : ["Meaning", "Definition", "English", "German", "French", u"意思", u"翻译", u"英语", u"法语", u"德语"],
        'audio'      : audiofields,
        'color'      : ["Color", "Colour", "Colored Hanzi", u"彩色"],
        'mw'         : mwfields,
        'mwaudio'    : utils.concat(utils.concat([[[x + " " + y, x + y] for x in mwfields] for y in audiofields])),
        #'weblinks'   : ["Links", "Link", "LinksBar", "Links Bar", "Link Bar", "LinkBar", "Web", "Dictionary", "URL", "URLs"],
        #'pos'        : ["POS", "Part", "Type", "Cat", "Class", "Kind", "Grammar"] ,
        'trad'       : ["Traditional", "Trad", "Traditional Chinese", "HK", u'繁体字', u'繁体', u"繁體字", u"繁體"],
        'simp'       : ["Simplified", "Simp", "Simplified Chinese", u"简体字", u"简体"]
      })
}

updatecontrolflags = {
    'expression' : None,
    'reading'    : "readinggeneration",
    'meaning'    : "meaninggeneration",
    'mw'         : "detectmeasurewords",
    'audio'      : "audiogeneration",
    'mwaudio'    : "mwaudiogeneration",
    'color'      : "colorizedcharactergeneration",
def interpret_gle_evo_sim_spoofs(bio_motifs_, spoofs_,filename=None):
    # assume that structure of spoofs is such that all spoofs for bio_motifs[0] are contained in spoofs[0]
    trials_per_motif = len(spoofs_[0])
    bio_motifs = [bio_motif for bio_motif in bio_motifs_ for i in range(trials_per_motif)]
    sim_motifs = concat(spoofs_)
    print len(bio_motifs), len(sim_motifs)
    assert len(bio_motifs) == len(sim_motifs)
    # bio_ics = [motif_ic(motif) for motif in bio_motifs
    #            for _ in range(trials_per_motif)]
    bio_ics = map(motif_ic, bio_motifs)
    sim_ics = map(motif_ic, sim_motifs)
    # sim_ics = [mean(map(motif_ic,motifs))
    #            for spoof in spoofs for motifs in spoof]
    # bio_ginis = [motif_gini(motif) for motif in bio_motifs
    #            for _ in range(trials_per_motif)]
    # sim_ginis = [mean(map(motif_gini,motifs))
    #              for spoof in spoofs for motifs in spoof]
    bio_ginis = map(motif_gini,bio_motifs)
    sim_ginis = map(motif_gini,sim_motifs)
    # bio_log_mis = [log(total_motif_mi(motif)) for motif in bio_motifs
    #            for _ in range(trials_per_motif)]
    # sim_log_mis = map(log,[mean(map(total_motif_mi,motifs))
    #            for spoof in tqdm(spoofs) for
    #            motifs in spoof])
    lens = [len(motif[0]) for motif in bio_motifs]
    # bio_mis = [total_motif_mi(motif)/choose(l,2)
    #            for (l, motif) in zip(lens, bio_motifs)]
    # sim_mis = [total_motif_mi(motif)/choose(l,2)
    #            for (l, motif) in zip(lens, spoofs)]
    print "finding mutual information"
    bio_mis = [total_motif_mi(motif)/choose(l,2) for (l, motif) in tqdm(zip(lens, bio_motifs))]
    sim_mis = [total_motif_mi(motif)/choose(l,2) for (l, motif) in tqdm(zip(lens, sim_motifs))]
    print "finding motif structures"
    bio_patterns_ = [find_pattern(motif)[0] for motif in tqdm(bio_motifs_)]
    bio_patterns = [pattern for pattern in bio_patterns_ for _ in xrange(trials_per_motif)]
    pattern_colors = {'direct-repeat':'g','inverted-repeat':'b','single-box':'r'}
    colors = [pattern_colors[p] for p in bio_patterns]
    plt.subplot(1,3,1)
    plt.title("Motif IC (bits)") 
    scatter(bio_ics,sim_ics,color=colors,
            line_color='black')
    ic_f = poly1d(polyfit(bio_ics, sim_ics,1))
    #plt.plot(*pl(ic_f,[min(bio_ics),max(bio_ics)]),linestyle='--',color='b')
    plt.xlim(*find_limits(bio_ics, sim_ics))
    plt.ylim(*find_limits(bio_ics, sim_ics))
    plt.ylabel("Simulated")
    plt.subplot(1,3,2)
    plt.xlabel("Observed")
    plt.title("Gini Coefficient")
    scatter(bio_ginis,sim_ginis,color=colors,
            line_color='black')
    gini_f = poly1d(polyfit(bio_ginis, sim_ginis,1))
    #plt.plot(*pl(gini_f,[min(bio_ginis),max(bio_ginis)]),
     #        linestyle='--',color='b')
    plt.xlim(*find_limits(bio_ginis, sim_ginis))
    plt.ylim(*find_limits(bio_ginis, sim_ginis))
    plt.subplot(1,3,3)
    plt.title("Pairwise MI per pair (bits)")
    draft = False
    end = 10 if draft else 108
    scatter(bio_mis,sim_mis,color=colors,
            line_color='black')
    mi_f = poly1d(polyfit(bio_mis, sim_mis,1))
    # plt.plot(*pl(mi_f,[min(bio_mis),max(bio_mis)]),
    #          linestyle='--',color='b')
    plt.xlim(*find_limits(bio_mis, sim_mis))
    plt.ylim(*find_limits(bio_mis, sim_mis))
    plt.legend()
    # #ax.set_bg_color('none')
    # ax.set_xlabel("Biological")
    # ax.set_ylabel("Simulated")
    plt.tight_layout()
    maybesave(filename)
Example #51
0
def chip_ps_ref(ps,mean_frag_length,cells=10000):
    """Do a chip seq experiment given the distribution ps"""
    G = len(ps)
    return concat(chip_ps(rfd_xs(ps),mean_frag_length)
                  for cell in verbose_gen(xrange(cells)))
Example #52
0
def chip_ps_spec(ps,mean_frag_length,cells=10000):
    return concat(chip_ps_spec_single_cell(ps,mean_frag_length)
                  for i in verbose_gen(xrange(cells)))
Example #53
0
def show_chip_shadow(G,endpoints,mean_frag_length,cells=10000,trials=10):
    lamb = 1.0/mean_frag_length
    [plt.plot(map_reads(concat([chip(G,endpoints,mean_frag_length) for i in range(cells)]),G),color='b')
     for i in verbose_gen(range(trials))]
Example #54
0
def chip_ps_np(ps,mean_frag_length,cells=10000,verbose=False):
    """Do a chip seq experiment given the distribution ps"""
    w = 10
    G = len(ps)# + w - 1 #XXX HACK
    cell_iterator = verbose_gen(xrange(cells),modulus=1000) if verbose else xrange(cells)
    return concat(chip(G,rfd_xs_np(ps),mean_frag_length) for cell in cell_iterator)
        print "%i %i %i\t %.2f %.2f %.4f %.4f %.2f - %.2fm" % (
            n,m,l,
            np.mean(scores[(m,l)]), np.std(scores[(m,l)]),  # for best params & variance
            np.mean(col_trscores), np.mean(col_cvscores),   # use x diagnostic training set overfit
            tn.dot(np.mean(ptscores[(m,l)],axis=0)[1:]),    # score
            (time()-t)/60),                                 # k-fold time
        print " ".join(["%.5f" %pts for pts in np.mean(ptscores[(m,l)],axis=0)]),
        print " ".join(["%.5f" %pts for pts in np.std(ptscores[(m,l)],axis=0)])
        
    if submit:
        # MAKE SUBMISSION
        # very complicated way to keep only the latest shopping_pt for each customer just to have everything in one row!!!!!11
        test = test[test.shopping_pt == test.reset_index().customer_ID.map(test.reset_index().groupby('customer_ID').shopping_pt.max())]
        Xt = test[con+cat+conf+extra]

        # TEST SET PREDICTION
        print "now predicting on test set...",
        allpreds = rfs.predict(Xt)
        test['pG'] = majority_vote(test.G,allpreds[:,selected]); print "done"
        
        # Fix state law products, then concatenate to string
        stateFix(encoders,test,['C','D','pG'],1)
        test['plan'] = concat(test,['A','B','C','D','E','F','pG'])
        test['plan'].to_csv('submission\\majority_rfs%i_%i.%i_shuffle_GAfix_%iof%iof%i.csv' % (
            n,m,l,NS/2+1,NS,N),header=1)

        # features importances
        impf = rfs.impf; impf.sort()
        

Example #56
0
def occs_from_direct_sampling(samples,ks):
    """Given a _list_ of samples and ks, compute occupancies"""
    num_samples = float(len(samples))
    counts = Counter(concat(samples))
    G = len(ks)
    return [counts[i]/num_samples for i in range(G)]
Example #57
0
File: hmm.py Project: poneill/amic
def test_baum_welch():
    site = [0,1,2,3,0,1,2,3]
    L = 8
    background = lambda n:[random.choice(range(4)) for i in range(n)]
    obs = concat([site + background(10) for i in range(100)])
    return baum_welch(obs,L)
def make_frag_lengths(lamb,trials):
    return frag_lengths(concat([make_frags(lamb) for trial in range(trials)]))