コード例 #1
0
ファイル: overlap.py プロジェクト: pwang724/PHD_experiment
def plot_overlap_water(res, start_days, end_days, figure_path):
    ax_args_copy = overlap_ax_args.copy()
    res = copy.copy(res)
    mice = np.unique(res['mouse'])
    res = filter.filter_days_per_mouse(res, days_per_mouse=end_days)
    add_naive_learned(res, start_days, end_days)
    ax_args_copy.update({'xlim': [-1, 2]})
    y_keys = ['US/CS+', 'CS+/US']
    summary_res = defaultdict(list)
    for arg in y_keys:
        _get_overlap_water(res, arg=arg)
        new_res = reduce.new_filter_reduce(
            res,
            filter_keys=['mouse', 'day', 'odor_valence'],
            reduce_key='Overlap')
        new_res['Type'] = np.array([arg] * len(new_res['training_day']))
        reduce.chain_defaultdicts(summary_res, new_res)

    summary_res.pop('Overlap_sem')
    summary_res.pop('Overlap_std')
    summary_res = filter.filter(summary_res, {'odor_valence': 'CS+'})
    mean_std_res = reduce.new_filter_reduce(summary_res,
                                            filter_keys='Type',
                                            reduce_key='Overlap')
    types = np.unique(summary_res['Type'])
    scatter_args_copy = scatter_args.copy()
    scatter_args_copy.update({'s': 2, 'alpha': .6})
    for i, type in enumerate(types):
        reuse_arg = True
        if i == 0:
            reuse_arg = False
        temp = filter.filter(summary_res, {'Type': type})
        plot.plot_results(temp,
                          x_key='Type',
                          y_key='Overlap',
                          loop_keys='mouse',
                          colors=['Black'] * len(mice),
                          plot_function=plt.scatter,
                          path=figure_path,
                          plot_args=scatter_args_copy,
                          ax_args=ax_args_copy,
                          save=False,
                          reuse=reuse_arg,
                          fig_size=(1.5, 1.5),
                          rect=(.25, .25, .6, .6),
                          legend=False)

    plot.plot_results(mean_std_res,
                      x_key='Type',
                      y_key='Overlap',
                      error_key='Overlap_sem',
                      path=figure_path,
                      plot_function=plt.errorbar,
                      plot_args=error_args,
                      ax_args=ax_args,
                      save=True,
                      reuse=True,
                      fig_size=(1.5, 1.5),
                      legend=False)
    print(mean_std_res['Overlap'])
コード例 #2
0
ファイル: weixin_process.py プロジェクト: demon108/stream
def process():
    conn_old = mysql.connect('bsppr', '192.168.241.7')
    mysql.insert(conn_old,'set names utf8')
    conn_new = mysql.connect('bsppr', '192.168.241.32')
    mysql.insert(conn_new,'set names utf8')
    cinfos_old = get_cinfos_moa(conn_old)
    cinfos_new = get_cinfos_moa(conn_new)
    mongo_conn = get_mongo_conn()
    tablename = 'weixin'
    while True:
	tmpdatas = mongo.find(mongo_conn, tablename, {},1000)
        rawdatas = []
        for raw in tmpdatas:
	    url = raw['url']
            mongo.delete(mongo_conn, tablename, {'url':url})
            date = raw['pubtime']
            now = datetime.datetime.now()
            diff = now - date
            if diff.days>2:
                continue
            rawdatas.append(raw)
        if len(rawdatas)==0:
	    print 'wait datas...'
            time.sleep(300)
        raw_old_qualified = filter(cinfos_old,rawdatas)
	if raw_old_qualified:
            old_insert_num = feed_xpost.feed_data_to_xpost(conn_old, raw_old_qualified,'old')
	    print 'old_insert_num: ',old_insert_num
        raw_new_qualified = filter(cinfos_new,rawdatas)
	if raw_new_qualified:
            new_insert_num = feed_xpost.feed_data_to_xpost(conn_new, raw_new_qualified,'new')
	    print 'new_insert_num: ',new_insert_num
コード例 #3
0
ファイル: test_process.py プロジェクト: easyshell/stream
def process():
    conn_old = mysql.connect('bsppr', '192.168.241.7')
    mysql.insert(conn_old, 'set names utf8')
    conn_new = mysql.connect('bsppr', '192.168.241.32')
    mysql.insert(conn_new, 'set names utf8')
    cinfos_old = get_cinfos(conn_old)
    #print cinfos_old
    cinfos_new = get_cinfos(conn_new)
    mongo_conn = get_mongo_conn()
    tablename = 'weixin'
    tmpdatas = mongo.find(mongo_conn, tablename, {}, 50)
    rawdatas = []
    for raw in tmpdatas:
        date = raw['pubtime']
        now = datetime.datetime.now()
        diff = now - date
        print diff.days
        rawdatas.append(raw)
    if len(rawdatas) == 0:
        time.sleep(10)
    raw_old_qualified = filter(cinfos_old, rawdatas)
    old_insert_num = feed_xpost.feed_data_to_xpost(conn_old, raw_old_qualified,
                                                   'old')
    raw_new_qualified = filter(cinfos_new, rawdatas)
    new_insert_num = feed_xpost.feed_data_to_xpost(conn_new, raw_new_qualified,
                                                   'new')
コード例 #4
0
ファイル: overlap.py プロジェクト: pwang724/PHD_experiment
def _get_overlap_water(res, arg):
    def _helper(list_of_name_ix_tuple, desired_tuple):
        for tuple in list_of_name_ix_tuple:
            if tuple[0] == desired_tuple:
                ix = tuple[1]
                assert len(ix) == 1, 'more than 1 unique entry'
                return ix[0]

    res['Overlap'] = np.zeros(res['day'].shape)
    names, ixs = filter.retrieve_unique_entries(
        res, ['mouse', 'day', 'odor_standard'])
    list_of_name_ix_tuples = list(zip(names, ixs))

    mice = np.unique(res['mouse'])
    for mouse in mice:
        mouse_res = filter.filter(res, filter_dict={'mouse': mouse})
        days = np.unique(mouse_res['day'])
        for day in days:
            mouse_day_res = filter.filter(mouse_res, filter_dict={'day': day})
            odors = np.unique(mouse_day_res['odor_standard'])
            if 'US' in odors:
                us_ix = _helper(list_of_name_ix_tuples, (mouse, day, 'US'))
                us_cells = np.where(res['sig'][us_ix])[0]
                for odor in odors:
                    odor_ix = _helper(list_of_name_ix_tuples,
                                      (mouse, day, odor))
                    odor_cells = np.where(res['sig'][odor_ix])[0]
                    if arg == 'US/CS+':
                        overlap = _overlap(us_cells, odor_cells, arg='over')
                    elif arg == 'CS+/US':
                        overlap = _overlap(odor_cells, us_cells, arg='over')
                    else:
                        raise ValueError('overlap arg not recognized')
                    res['Overlap'][odor_ix] = overlap
コード例 #5
0
def write_data(path):
	import filter
	from pyspark.mllib.feature import Word2Vec, Word2VecModel

	# load data
	loc = '/user/rmusters/text/2015/01/*'
	text_file = sc.textFile(loc)
	data = text_file.map(lambda line: filter.filter(line).split(" "))

	# load model
	word2vec = Word2Vec()
	model = Word2VecModel.load(sc, '/user/rmusters/2015model99')

	# get a tweet vector pair.
	from pyspark.sql import SQLContext
	sqlContext = SQLContext(sc)
	lookup = sqlContext.read.parquet('/user/rmusters/2015model99/data').alias("lookup")
	lookup_bd = sc.broadcast(lookup.rdd.collectAsMap())

	vectors = data.map(lambda ws: [lookup_bd.value.get(w) for w in ws])
	logger.info(vectors.count())

	data = text_file.map(lambda line: (line, filter.filter(line).split(" ")))\
							.map(lambda (text, filtered): (text, filtered, [lookup_bd.value.get(w) for w in filtered][0]))

	from pyspark.sql.functions import monotonicallyIncreasingId
	df = data.toDF(["text", "filtered_text", "vectors"])
	# This will return a new DF with all the columns + id
	res = df.withColumn("id", monotonicallyIncreasingId())
	res.write.parquet(path, mode="overwrite")
コード例 #6
0
ファイル: graph.py プロジェクト: revan/BusOccupancy
def graph(infile, graph_type, router_filtering, strength_filtering,
          missing_strength_filtering, end_time, coincidence, binsize, name,
          use_labels):

    jason = json.load(infile)
    infile.close()
    jason["packets"] = pd.DataFrame(jason["packets"])
    jason["packets"]["time"] /= 1000000

    filter(jason, router_filtering, strength_filtering,
           missing_strength_filtering, end_time)
    jason["last"] = jason["packets"]["time"].iget(-1)

    if(graph_type == "unique"):
        plotUnique(jason, binsize=binsize, labels=use_labels)
    elif(graph_type == "packets"):
        plotPackets(jason, binsize=binsize, labels=use_labels)
    elif(graph_type == "grid"):
        plotGrid(jason, coincidence=coincidence, name=name, labels=use_labels)
    elif(graph_type == "packethist"):
        plotPacketHistogram(jason)
    elif(graph_type == "strhist"):
        plotStrengthHistogram(jason)
    elif(graph_type == "segments"):
        plotSegments(jason, name=name, labels=use_labels)
    elif(graph_type == "vectors"):
        plotVectors(jason, name=name, labels=use_labels)
コード例 #7
0
ファイル: pack.py プロジェクト: mtanneau/cblib-base
def pack(packname, filtexpr, setexpr, packall):
    # tarfile 'filter' requires v2.7
    if sys.version_info < (2, 7):
        raise Exception('Python 2.7 or later required..')

    # Get the root directory of cblib
    scriptdir = os.path.split(inspect.getfile(inspect.currentframe()))[0]
    rootdir = os.path.join(scriptdir, '..', '..')

    if not packall and setexpr != None:
        if os.path.isfile(setexpr):
            rootdir = os.path.dirname(setexpr)
        else:
            rootdir = setexpr

    # Find all instances
    files = list()
    cbfset = CBFset()
    cbfset.read(setexpr)
    filter(filtexpr, None, cbfset, lambda x: files.append(x))

    if packall:
        # Find all instance information
        files = files + glob.glob(os.path.join(rootdir, 'instances', '*.csv'))
        files = files + glob.glob(os.path.join(rootdir, 'instances', '*.bib'))

        # Find all source files from 'tools'
        files = files + glob.glob(os.path.join(rootdir, 'tools', '*.c'))
        files = files + glob.glob(os.path.join(rootdir, 'tools', '*.h'))
        files = files + glob.glob(os.path.join(rootdir, 'tools', 'Makefile.*'))

        # Find all documents from 'docs'
        files = files + glob.glob(os.path.join(rootdir, 'docs', '*.pdf'))

        # Find all python files from 'scripts'
        files = files + glob.glob(os.path.join(rootdir, 'scripts', '*.py'))
        files = files + glob.glob(
            os.path.join(rootdir, 'scripts', 'admin', '*.py'))
        files = files + glob.glob(
            os.path.join(rootdir, 'scripts', 'data', '*.py'))
        files = files + glob.glob(
            os.path.join(rootdir, 'scripts', 'dist', '*.py'))
        files = files + glob.glob(
            os.path.join(rootdir, 'scripts', 'filters', '*.py'))
        files = files + glob.glob(
            os.path.join(rootdir, 'scripts', 'solvers', '*.py'))

        # Find all other important files
        files.append(os.path.join(rootdir, 'README'))
        files.append(os.path.join(rootdir, 'instances', 'cbf', 'README'))

    # Create compressed tar file
    print('Writing ' + packname + '.tar.gz')
    tar = tarfile.open(os.path.join(scriptdir, packname + '.tar.gz'), 'w:gz')
    for f in files:
        extractname = os.path.join(packname, os.path.relpath(f, rootdir))
        print(extractname)
        tar.add(f, arcname=extractname, filter=addwritepermission)
    tar.close()
コード例 #8
0
ファイル: user_test.py プロジェクト: tt2lzx/DND_QQ_ROBOT
 def tearDownClass(self):
     # 必须使用 @ classmethod装饰器, 所有test运行完后运行一次
     test_content = {}
     test_content['message'] = '.drop 天子'
     test_content['sender'] = sender
     filter(test_content)
     test_content['message'] = ''
     print('测试结束')
コード例 #9
0
def _hist(res, save_path):
    ## distribution
    def histogram(real, label, bin, range, ax):
        density, bins = np.histogram(real, bins=bin, density=True, range=range)
        unity_density = density / density.sum()
        widths = bins[:-1] - bins[1:]
        ax.bar(bins[1:], unity_density, width=widths, alpha=.5, label=label)

    for mouse in np.unique(res['mouse']):
        pt_csp = filter.filter(res, {'mouse': mouse, 'odor_valence': 'PT CS+'})
        csp = filter.filter(res, {'mouse': mouse})
        csm = filter.filter(res, {'mouse': mouse})

        data = pt_csp['velocity']
        start = pt_csp['on'][0]
        end = pt_csp['end'][0]
        data_before = data[:, :start].flatten()
        data_during = data[:, start:end].flatten()
        data_after = data[:, end:].flatten()

        bins = 50
        range = [-70, 70]
        fig = plt.figure(figsize=(2, 1.5))
        ax = fig.add_axes([0.2, 0.2, 0.7, 0.7])
        histogram(data_before, 'before', bin=bins, range=range, ax=ax)
        histogram(data_during, 'during', bin=bins, range=range, ax=ax)
        plt.xlim([range[0] - 0.5, range[1] + .5])
        ax.spines["right"].set_visible(False)
        ax.spines["top"].set_visible(False)
        ax.xaxis.set_ticks_position('bottom')
        ax.yaxis.set_ticks_position('left')
        rs = ranksums(data_before, data_during)[-1]
        xlim = plt.xlim()
        ylim = plt.ylim()
        x = xlim[0] + .7 * (xlim[1] - xlim[0])
        y = ylim[0] + .7 * (ylim[1] - ylim[0])
        plot.significance_str(x, y, rs)
        name = 'before_during_mouse_{}'.format(mouse)
        plot._easy_save(save_path, name=name)

        fig = plt.figure(figsize=(2, 1.5))
        ax = fig.add_axes([0.2, 0.2, 0.7, 0.7])
        histogram(data_during, 'before', bin=bins, range=range, ax=ax)
        histogram(data_after, 'during', bin=bins, range=range, ax=ax)
        plt.xlim([range[0] - 0.5, range[1] + .5])
        ax.spines["right"].set_visible(False)
        ax.spines["top"].set_visible(False)
        ax.xaxis.set_ticks_position('bottom')
        ax.yaxis.set_ticks_position('left')
        rs = ranksums(data_during, data_after)[-1]
        xlim = plt.xlim()
        ylim = plt.ylim()
        x = xlim[0] + .7 * (xlim[1] - xlim[0])
        y = ylim[0] + .7 * (ylim[1] - ylim[0])
        plot.significance_str(x, y, rs)
        name = 'during_after_mouse_{}'.format(mouse)
        plot._easy_save(save_path, name=name)
コード例 #10
0
def process_commits(features):
    commits_dict = gitable.dumpCommits()
    weeks = dict()
    all = dict()

    for author, commits in commits_dict.iteritems():
        all[author] = len(commits[1:])
        for week in commits[0].keys():
            if not weeks.get(week):
                weeks[week] = dict()
            week_count = weeks.get(week)

            num= commits[0].get(week)
            week_count[author]  = num
            weeks[week]  = week_count
    weekly = weeks.keys()
    weekly.sort()
    weekly_count = dict()
    sorted_week_count = []
    for week in weekly:

        commit_week = filter.filter(weeks.get(week))
        large = commit_week.large(0.25, percent=True)
        small = commit_week.small(0.1,percent = True)
        weekly_count[week]  = commit_week.sum()
        sorted_week_count.append(commit_week.sum())
        print('%s, %d' %(week, commit_week.sum()))
        #logger.info(large)
        #logger.info(small)

    # Uneven work of weeks
    draw_bar(sorted_week_count,"commits per week","commits",range(len( sorted_week_count)),"week",0.35)
    week_filter = filter.filter(weekly_count)
    features['commits_week']= week_filter
    small_weeks = week_filter.small()
    if len(small_weeks)>0:
        features['low commits during the gap time']= small_weeks
        logger.info(small_weeks)
    large_weeks  =week_filter.large()
    if len(large_weeks)>0:
        features['extra large work during the week'] = large_weeks
        #logger.info(large_weeks)
    #logger.info(large_weeks)


    # Uneven contribute of workers
    draw_bar(all.values(), "commits number posted by person","issues", range(len(all.keys())),"person",0.35)
    contribution_filter =filter.filter(all)
    features['commits_person'] = contribution_filter
    leader  = contribution_filter.large(delta=1)
    if len(leader)>0 :
        features['large commits by single user']  =  leader

        #logger.info("Project has leader %s" %(leader))
    passenger  = contribution_filter.small(delta=1)
    if len(passenger)>0 :
        features['small commits by single user']= passenger
コード例 #11
0
ファイル: main.py プロジェクト: lublic/iSales_1
def get_revenue():
    # Umsatz laden
    revenues = daten.daten_laden("umsatz.json")

    # liste mit den gefilterten Umsätzen erstellen
    revenues_filtered = revenues

    # "Alle" als Dropdown beim Filter auswählen
    selected_jahr = selected_kunde = selected_lieferant = "Alle"

    # Wenn gefiltert wird dann...
    if request.method == 'POST':
        # lösche alle Umsätze, welche nicht gewünscht sind
        revenues_filtered = filter.filter(revenues_filtered, 'jahr', request.form['jahr'])
        revenues_filtered = filter.filter(revenues_filtered, 'lieferant', request.form['lieferant'])
        revenues_filtered = filter.filter(revenues_filtered, 'kunde', request.form['kunde'])

        # Filter richtiges Dropdown item auswählen
        selected_jahr = request.form['jahr']
        selected_kunde = request.form['kunde']
        selected_lieferant = request.form['lieferant']

    # Listen für die Dropdown filter erstellen
    filter_list_jahr = filter.getFilterList(revenues, 'jahr', selected_jahr)
    filter_list_lieferant = filter.getFilterList(revenues, 'lieferant', selected_lieferant)
    filter_list_kunde = filter.getFilterList(revenues, 'kunde', selected_kunde)

    # Liste für die gefilterten Umsätze
    sumlist = []

    # Die gefilterten Umsätze in die sumlist hinzufügen
    for k, v in revenues_filtered.items():
        # Füge Item der Liste hinzu
        sumlist.append(v['umsatz'])

    # Summe der gefilterten Umsätze erstellen
    summe_umsatz = sum(sumlist)

    # Liste für die gefilterten Jahre
    yearlist = []

    # Die gefilterten Jahre in die yearlist hinzufügen
    for k, v in revenues_filtered.items():
        # Füge Item der Liste hinzu
        yearlist.append(v['jahr'])

    # Datenvisualisierung der Umsätze (Quelle: https://plotly.com/python/bar-charts/)
    if sumlist:
        fig = px.bar(x=yearlist, y=sumlist, title="Grafische Abbildung der Umsätze", labels=dict(x="Jahre", y="Umsätze in CHF"), barmode='group')
    else:
        fig = px.bar(x=None, y=None, title="Grafische Abbildung der Umsätze", labels=dict(x="Jahre", y="Umsätze in CHF"))

    div = plotly.io.to_html(fig, include_plotlyjs=True, full_html=False)

    # Ausgabe für das Html Datenausgabe
    return render_template('datenausgabe.html', revenues=revenues_filtered, filter_list_jahr=filter_list_jahr, filter_list_lieferant=filter_list_lieferant, filter_list_kunde=filter_list_kunde, summe_umsatz = summe_umsatz, viz_div = div)
コード例 #12
0
ファイル: plot_fv.py プロジェクト: cbosoft/pi_rheo_proj
def check(filename, viscosity, fillvol):
    ########### Check the calibration using the water run
    # Reads the data

    # Geometry of the couette cell

    roo = 0.044151 / 2.0  # outer cell outer radius in m
    ro = 0.039111 / 2.0  # outer cell radius in m
    ri = 0.01525  # inner cell radius in m

    icxsa = np.pi * (ri**2)
    ocxsa = np.pi * (ro**2)
    dxsa = ocxsa - icxsa  # vol per height in m3/m
    dxsa = dxsa * 1000  # l / m
    dxsa = dxsa * 1000  # ml / m

    fill_height = fillvol / dxsa

    datf = pd.read_csv(filename)

    stw = datf['t']
    stw = stw - stw[0]
    dr = datf['dr']
    cr = datf['cr']
    cr2a = datf['cr2a']
    cr2b = datf['cr2b']
    pv = datf['pv']

    # Filter noise from data
    dr = filter(stw, dr, method="butter", A=2, B=0.001)
    cr = filter(stw, cr, method="butter", A=2, B=0.001)
    cr2a = filter(stw, cr2a, method="butter", A=2, B=0.001)
    cr2b = filter(stw, cr2b, method="butter", A=2, B=0.001)

    # Calculate viscosity
    musw = [viscosity] * len(cr)
    cu = 16.573 * cr - 29.778
    cu2a = 11.307 * cr2a - 29.066
    cu2b = 11.307 * cr2b - 29.066
    cu = np.array((cu + cu2a + cu2b) / 3)
    cub = 0.00229473 * pv + 0.48960784
    sp_rpms = dr * 316.451 - 163.091
    sp_rads = (sp_rpms * 2 * np.pi) / 60
    sn_rpms = 5.13 * pv + 15.275
    vo = 0.0636 * pv + 2.423

    #T calibration
    gam_dotw = (sp_rads * ri) / (ro - ri)
    #Tw_fc       = eff[0] * (cu - cub) + eff[1]
    Tw_fc = eff[0] * ((cu - cub) * vo) + eff[1]
    tauw_fc = Tw_fc / (2 * np.pi * ri * ri * fill_height)
    muw_fc = tauw_fc / gam_dotw
    return stw, muw_fc, musw
コード例 #13
0
def get_compare_responsive_sig(res):
    key = 'ssig'

    def _helper(res):
        assert res['odor_valence'][0] == 'CS+', 'wrong odor'
        assert res['odor_valence'][1] == 'CS-', 'wrong odor'
        on = res['DAQ_O_ON_F'][0]
        off = res['DAQ_W_ON_F'][0]
        sig_p = res[key][0]
        sig_m = res[key][1]
        dff_p = res['dff'][0]
        dff_m = res['dff'][1]
        sig_p_mask = sig_p == 1
        sig_m_mask = sig_m == 1
        dff_mask = dff_p - dff_m
        dff_mask = np.mean(dff_mask[:, on:off], axis=1)
        p = [a and b for a, b in zip(sig_p_mask, dff_mask > 0)]
        m = [a and b for a, b in zip(sig_m_mask, dff_mask < 0)]
        return np.array(p), np.array(m)

    mice = np.unique(res['mouse'])
    res = filter.filter(res, filter_dict={'odor_valence': ['CS+', 'CS-']})
    sig_res = reduce.new_filter_reduce(
        res, reduce_key=key, filter_keys=['mouse', 'day', 'odor_valence'])
    dff_res = reduce.new_filter_reduce(
        res, reduce_key='dff', filter_keys=['mouse', 'day', 'odor_valence'])
    sig_res['dff'] = dff_res['dff']

    new_res = defaultdict(list)
    for mouse in mice:
        mouse_res = filter.filter(sig_res, filter_dict={'mouse': mouse})
        days = np.unique(mouse_res['day'])
        p_list = []
        m_list = []
        for i, day in enumerate(days):
            mouse_day_res = filter.filter(mouse_res, filter_dict={'day': day})
            p, m = _helper(mouse_day_res)
            new_res['mouse'].append(mouse)
            new_res['mouse'].append(mouse)
            new_res['day'].append(day)
            new_res['day'].append(day)
            new_res['odor_valence'].append('CS+')
            new_res['odor_valence'].append('CS-')
            new_res[key].append(p)
            new_res[key].append(m)
            new_res['Fraction'].append(np.mean(p))
            new_res['Fraction'].append(np.mean(m))
            p_list.append(p)
            m_list.append(m)
    for key, val in new_res.items():
        new_res[key] = np.array(val)
    return new_res
コード例 #14
0
def estimate_bandpass(data):
    """ Estimate bandpass by rolling median over time
    
    data (np.ma.array): data array with axes (freq, time)
    window (int): size of moving window over which to compute
                  bandpass estimated by median.
    
    TODO: Fit a polynomial instead?
    """

    est = filter(data, params.st_bp_window_f, axis=0)
    est = filter(est, params.st_bp_window_t, axis=1)

    return est
コード例 #15
0
ファイル: driver.py プロジェクト: jhford/manifest-utilities
def main():
    parser = optparse.OptionParser("%prog - I diff repo manifests")
    parser.add_option("--filter", dest="filters", action="append")
    parser.add_option("--diff", "-d", dest="diff", action="store_false", default=True)
    parser.add_option("--format", dest="out_format", default="report")
    parser.add_option("--output", dest="output", default=None)
    parser.add_option("--root", dest="root", default=os.getcwd())
    options, args = parser.parse_args()

    if not options.output:
        output = sys.stdout
    else:
        output = options.output
        if os.path.exists(output):
            print >> sys.stderr, "ERROR: Output file already exists"
            exit(1)
    if len(args) == 0:
        print "Choose a command: diff, cleanup, filter"
        exit(1)
    elif len(args) > 1:
        cmd_args = args[1:]
    else:
        cmd_args = None
    cmd = args[0]
    if cmd == 'diff':
        if len(cmd_args) != 2:
            print >> sys.stderr, "ERROR: must specify exactly two arguments (left and right)"
            exit(1)
        diff(cmd_args[0], cmd_args[1], output=output, output_format=options.out_format, filters=options.filters)
    elif cmd == 'freeze':
        freeze(cmd_args[0], output, options.root,
               gaia_branch='v1-train',
               gecko_branch='gecko-18',
               moz_remotes=['b2g'],
               moz_branch='v1-train')
    elif cmd == 'cleanup':
        if len(cmd_args) != 1:
            print >> sys.stderr, "ERROR: you can only filter one file at a time"
            exit(1)
        cleanup(cmd_args[0], output, options.filters)
    elif cmd == 'filter':
        if len(cmd_args) != 1:
            print >> sys.stderr, "ERROR: you can only filter one file at a time"
            exit(1)
        if options.filters == None:
            print >> sys.stderr, "ERROR: you must specify filters for the filter command"
            exit(1)
        filter(cmd_args[0], output, options.filters)
コード例 #16
0
def get_qa(path):
    T = Tools()
    name_list = T.read_file_or_dir(path)
    for txt_file in name_list:
        f = filter(txt_file)
        zt_word,title_list,__1_dict = f.analayis()
        f.get_qa(T,zt_word,title_list,__1_dict)
コード例 #17
0
def agglomerate_days(res, condition, first_day, last_day):
    mice = np.unique(res['mouse'])
    out = defaultdict(list)

    for i, mouse in enumerate(mice):
        if hasattr(condition, 'csp'):
            odors = condition.odors[mouse]
        else:
            odors = condition.dt_odors[mouse] + condition.pt_odors[mouse]
        for odor in odors:
            filter_dict = {
                'mouse': mouse,
                'day': np.arange(first_day[i], last_day[i] + 1),
                'odor': odor
            }
            filtered_res = filter.filter(res, filter_dict)

            keys = [
                'lick', 'lick_collection', 'lick_baseline', 'time_first_lick',
                'time_first_lick_collection', 'lick_5s', 'lick_com'
            ]
            temp_res = reduce_by_concat(filtered_res,
                                        'lick',
                                        rank_keys=['day', 'ix'])
            for k in keys:
                _ = reduce_by_concat(filtered_res, k, rank_keys=['day', 'ix'])
                temp_res[k] = _[k]

            temp_res['day'] = np.array(sorted(filtered_res['day']))
            temp_res['trial'] = np.arange(len(temp_res['lick']))
            if len(temp_res['lick']):
                append_defaultdicts(out, temp_res)
    for key, val in out.items():
        out[key] = np.array(val)
    return out
コード例 #18
0
def new_filter_reduce(res, filter_keys, reduce_key, regularize='min'):
    out = defaultdict(list)
    if isinstance(filter_keys, str):
        filter_keys = [filter_keys]
    unique_combinations, ixs = filter.retrieve_unique_entries(res, filter_keys)
    for v in unique_combinations:
        filter_dict = {
            filter_key: val
            for filter_key, val in zip(filter_keys, v)
        }
        cur_res = filter.filter(res, filter_dict)

        if len(cur_res[reduce_key]):
            try:
                if regularize == 'min':
                    _regularize_length(cur_res, reduce_key)
                elif regularize == 'max':
                    _regularize_length_cristian_data(cur_res, reduce_key)
                else:
                    raise ValueError('did not recognize regularize keyword')
            except:
                print('cannot regularize the length of {}'.format(reduce_key))
            temp_res = reduce_by_mean(cur_res, reduce_key)
            append_defaultdicts(out, temp_res)

    bad = []
    for key, val in out.items():
        try:
            out[key] = np.array(val)
        except:
            bad.append(key)
            print('{} could not be reduced'.format(key))
    for badkey in bad:
        out.pop(badkey)
    return out
コード例 #19
0
def plot_compare_responsive(res, figure_path):
    ax_args_copy = ax_args.copy()
    ax_args_copy.update({
        'ylim': [0, .65],
        'yticks': [0, .2, .4, .6],
        'xticks': list(range(20))
    })
    res = copy.copy(res)
    res = filter.filter(res, {'odor_valence': ['CS+', 'CS-']})
    res_ = get_compare_responsive_sig(res)

    line_args_copy = line_args.copy()
    line_args_copy.update({
        'marker': '.',
        'linestyle': '--',
        'linewidth': .5,
        'alpha': .75
    })

    plot.plot_results(res_,
                      x_key='day',
                      y_key='Fraction',
                      loop_keys=['mouse', 'odor_valence'],
                      colors=['green', 'red'] * 10,
                      path=figure_path,
                      plot_args=line_args_copy,
                      ax_args=ax_args_copy,
                      fig_size=(2, 1.5),
                      legend=False)
コード例 #20
0
ファイル: character_test.py プロジェクト: tt2lzx/DND_QQ_ROBOT
 def test_gen(self):
     test_content['message'] = '.gen 桃毒'
     filter(test_content)
     test_content['message'] = '.guid'
     filter(test_content)
     test_content['message'] = '.choose 3'
     filter(test_content)
     test_content['message'] = '.guid'
     filter(test_content)
     test_content['message'] = '.drop 桃毒'
     filter(test_content)
     # test_content['message'] = ''
     # filter(test_content)
     # test_content['message'] = ''
     # filter(test_content)
     pass
コード例 #21
0
def main():
    from pyspark import SparkContext, SparkConf
    import filter

    #spark-submit --py-files master/hadoop/stemmer.py,master/hadoop/filter.py --master yarn --executor-memory 12g --deploy-mode cluster --num-executors 400  master/hadoop/word_count.py

    loc = '/user/rmusters/text/2015/01/*'

    #spark-submit --py-files master/hadoop/stemmer.py,master/hadoop/filter.py --master yarn --executor-memory 32g --deploy-mode cluster --num-executors 1000  master/hadoop/word_count.py
    loc = '/user/rmusters/text/2015/*/*'

    conf = (SparkConf().set("spark.driver.maxResultSize", "0"))

    sc = SparkContext(appName='word_count_filtered', conf=conf)

    text_file = sc.textFile(loc)

    threshold = 10
    counts = text_file.map(lambda line: filter.filter(line)) \
        .flatMap(lambda line: line.split(" ")) \
        .map(lambda word: (word, 1)) \
        .reduceByKey(lambda a, b: a + b) \
        .filter(lambda pair:pair[1] >= threshold)\
        .sortBy(lambda x:x[1], ascending=True)

    counts.saveAsTextFile(
        '/user/rmusters/counts_taggedUrl_Mention_Stopwords_Punctuation_ignoreNonAscii_StemmedThreshold10_haha_hashtag2015all'
    )
    print counts.count()
コード例 #22
0
def plot_max_dff_valence(res, start_days, end_days, figure_path):
    res = copy.copy(res)
    # list_of_days = list(zip(start_days, end_days))
    list_of_days = end_days
    start_end_day_res = filter.filter_days_per_mouse(
        res, days_per_mouse=list_of_days)
    start_end_day_res = filter.filter(start_end_day_res,
                                      {'odor_valence': ['CS+', 'CS-']})
    _max_dff(start_end_day_res)
    start_end_day_res = reduce.new_filter_reduce(
        start_end_day_res,
        filter_keys=['odor_valence', 'mouse'],
        reduce_key='max_dff')
    add_naive_learned(start_end_day_res, start_days, end_days)
    ax_args_copy = ax_args.copy()
    # ax_args_copy.update({'xticks':[res['DAQ_O_ON_F'][-1], res['DAQ_W_ON_F'][-1]], 'xticklabels':['ON', 'US'],
    #                      'ylim':[0, .2]})
    nMice = len(np.unique(res['mouse']))
    # colors = ['Green'] * nMice + ['Red'] * nMice

    # trace_args_copy = trace_args.copy()
    # trace_args_copy.update({'linestyle':'--','alpha':.5, 'linewidth':.75})

    plot.plot_results(start_end_day_res,
                      loop_keys='mouse',
                      x_key='odor_valence',
                      y_key='max_dff',
                      path=figure_path,
                      colors=['gray'] * 10,
                      legend=False,
                      fig_size=(2, 1.5))
コード例 #23
0
def plot(dataset_filename):
    filtered_tweets = filter.filter(dataset_filename)
    relevant_tweets = classify.classify(filtered_tweets)

    counts_per_time_unit = dict()
    for tweet in relevant_tweets:
        # get python date from the tweet time
        tweet_date = dateparser.parse(tweet.time)

        # get key to uniquely identify the date and hour
        key = tweet_date.strftime("%Y-%m-%d: %H")

        if key in counts_per_time_unit:
            counts_per_time_unit[key] += 1
        else:
            counts_per_time_unit[key] = 1

    dates = sorted(counts_per_time_unit.keys())

    if not os.path.exists('distributions'):
        os.makedirs('distributions')

    # get distribution filename from the data source's filename
    distribution_filename = dataset_filename.split('/')[len(dataset_filename.split('/')) - 1]

    # remove the previous file extension
    distribution_filename = distribution_filename.split('.')[0]
    f = open('distributions/' + distribution_filename + '.csv', 'w')
    for key in dates:
        f.write(str(key) + "," + str(counts_per_time_unit[key]) + "\n")

    print "Successfully generated file", 'distributions/' + distribution_filename + '.csv'
コード例 #24
0
def filterff(path_file, filter_method="butter", a=1, b=1):
    # load up some noisy data
    logf = open(path_file, "r")
    dat = logf.readlines()
    logf.close()
    
    # sort the loaded data into lists
    t = [0] * 0  # x, time
    s = [0] * 0  # y, speed
    start = 0.0  # start time (since epoch)
    st = [0] * 0 # specific time (time since run begun, seconds)
  

    splt = dat[1].split(",", 5)
    t.append(float(splt[0]))
    s.append(float(splt[2]))
    st.append(0.0)

    for i in range(2, len(dat)):
        splt = dat[i].split(",", 5)
        t.append(float(splt[0]))
        s.append(float(splt[2]))
        st.append(t[i - 1] - t[0])
    
    # Apply filter
    c = filter(t, s, method=filter_method, A=a)
    return st, s, c
コード例 #25
0
def _example_velocity(res, save_path):
    xkey = 'trial'
    ykey = 'velocity'

    line_args = {'alpha': .5, 'linewidth': .25, 'marker': 'o', 'markersize': 0}
    mouse = 0
    odor = 'PT CS+'
    temp = filter.filter(res, {'odor_valence': odor, 'mouse': mouse})
    start = temp['on'][0]
    off = temp['off'][0]
    end = temp['end'][0]
    ax_args = {
        'xticks': [start, off, end],
        'xticklabels': ['ON', 'OFF', 'US'],
        'ylim': [-5, 100]
    }

    for i, v in enumerate(temp[ykey]):
        v_ = savgol_filter(v, window_length=41, polyorder=0)
        temp[ykey][i] = v_

    plot.plot_results(temp,
                      x_key=xkey,
                      y_key=ykey,
                      loop_keys=['day', 'ix'],
                      select_dict={
                          'odor_valence': odor,
                          'mouse': mouse
                      },
                      colors=['black'] * 200,
                      plot_args=line_args,
                      ax_args=ax_args,
                      legend=False,
                      path=save_path)
コード例 #26
0
ファイル: user_test.py プロジェクト: tt2lzx/DND_QQ_ROBOT
 def test_ul(self):
     test_content['message'] = '.gen 桃毒'
     filter(test_content)
     test_content['message'] = '.ul'
     filter(test_content)
     test_content['message'] = '.switch 天子'
     filter(test_content)
     test_content['message'] = '.drop 桃毒'
     filter(test_content)
コード例 #27
0
ファイル: print-reftable.py プロジェクト: mtanneau/cblib-base
def reftable(out, filtexpr, setexpr):
    # Find the directory of this script
    scriptdir = os.path.split(inspect.getfile(inspect.currentframe()))[0]
    rootdir = os.path.join(scriptdir, '..', '..')

    # Default value
    if setexpr == None:
        setexpr = os.path.realpath(
            os.path.abspath(os.path.join(rootdir, 'instances', 'cbf')))

    # Define files
    filemap = dict()
    cbfset = CBFset()
    cbfset.read(setexpr)

    filter.filter(
        filtexpr, None, cbfset, lambda x: files_add(
            cbfset.getpack(x, cbfset.rootdir), cbfset.getinstance(x), filemap))

    # Define sorting
    convert = lambda text: int(text) if text.isdigit() else text
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]

    out.opentable()

    csvpath = os.path.join(rootdir, 'instances', 'ref.csv')
    csvfile = open(csvpath, 'rt')
    try:
        csvdialect = csv.Sniffer().sniff(csvfile.read(), ';\t')
        csvfile.seek(0)

        csvreader = csv.reader(csvfile, csvdialect, quotechar='"')
        next(csvreader)
        for row in csvreader:
            if row[0] in filemap:
                mylst = list(set(row[1].split(', ')) & filemap[row[0]])
                if len(mylst) >= 1:
                    mylst.sort(key=alphanum_key)
                    out.addrow(row[0], mylst, row[2], row[3], row[4])

    except Exception as e:
        print(str(e))
    finally:
        csvfile.close()

    out.closetable()
コード例 #28
0
ファイル: plot_visco.py プロジェクト: cbosoft/pi_rheo_proj
def calc_T(filename, fill_volume=5, visc=0.001):
    datf = pd.read_csv(filename)

    # Cell geometry
    roo = 0.044151 / 2.0  # outer cell outer radius in m
    ro = 0.039111 / 2.0  # outer cell radius in m
    ri = 0.01525  # inner cell radius in m
    #L = 0.039753 - (roo - ro)  # height of couette cell

    icxsa = np.pi * (ri**2)
    ocxsa = np.pi * (ro**2)
    dxsa = ocxsa - icxsa  # vol per height in m3/m
    dxsa = dxsa * 1000  # l / m
    dxsa = dxsa * 1000  # ml / m

    fill_height = fill_volume / dxsa

    # Split up csv columns
    t = datf['t']
    st = t - t[0]
    dr = datf['dr']
    cr = datf['cr']
    pv = datf['pv']

    # Filtering: aye or naw?
    if True:
        dr = np.array(filter(st, dr, method="butter", A=2, B=0.001))
        cr = np.array(filter(st, cr, method="butter", A=2, B=0.001))

    cr = filter(st, cr, method="gaussian", A=100, B=100)
    cr = filter(st, cr, method="butter", A=2, B=0.0001)

    # Calculate torque
    mus = [visc] * len(cr)
    sp_rpms = dr * 316.451 - 163.091
    sp_rads = (sp_rpms * 2 * np.pi) / 60
    sn_rpms = 5.13 * pv + 15.275
    gam_dot = (sp_rads * ri) / (ro - ri)
    tau = mus * gam_dot
    T = tau * (2 * np.pi * ri * ri * fill_height)
    Ts = T / (1.0 - (sp_rpms / sn_rpms))
    #    cu      = (-956.06 * (cr ** 3)) + (6543.97 * (cr ** 2)) + (-14924.369 * cr) + 11341.612
    cu = (25.177 * cr) - 45.264
    vo = 0.0636 * pv + 2.423
    pe = cu * vo
    return st, mus, sp_rpms, sp_rads, gam_dot, tau, T, cu, vo, pe, Ts, sn_rpms, pv
コード例 #29
0
def call_filter(filename):
	# data = filter.filter(sys.argv[1])
	data = filter.filter(filename)

	# Write to output.csv
	with open(os.path.join(os.path.expanduser('~'),'Documents/Design-Project/analysis/OM',os.path.splitext(filename)[0] + '.csv'), "w") as f:
		writer = csv.writer(f)
		writer.writerows(data)
コード例 #30
0
def  process_issues(features):
    issues = gitable.launchDump()
    weekly_issues = issues['week']
    del issues['week']

    author_issues = dict()
    events_issues =dict()
    comments_issues = dict()
    for issue, events in issues.iteritems():
        #print("ISSUE " + str(issue))


        dict_add(author_issues, events[0].user)

        dict_add(comments_issues, events[0].comments)

        dict_add(events_issues, len(events[1:]))


    draw_bar(author_issues.values(), "issues number posted by person","issues", range(len(author_issues.keys())),"person",0.35)

    author_filter = filter.filter(author_issues)
    large_author = author_filter.large()
    features['issues_person'] = author_filter
    if(len(large_author) >0 ):
        features['large issues post by single user'] = large_author

    small_author = author_filter.small(delta=2)
    if(len(small_author) >0 ):
        features['small issues post by single user'] = small_author

    draw_bar(comments_issues.values(),"issues number with same comments number","issues",comments_issues.keys(),"comments number",0.35)

    events_filter= filter.filter(events_issues)
    features['events_issues'] = events_filter
    if len(events_filter.large())>0:
        features['large issues with same events'] = events_filter.large()

    comments_filter = filter.filter(comments_issues)
    features['comments_issues'] = comments_filter
    large_comments = comments_filter.large()
    if(len(large_comments)>0):
        features['large issues with same comments'] = large_comments
    draw_bar(events_issues.values(),"issues number with same events number", "issues",events_issues.keys(),"events number",0.35)
    single_user = filter.filter(author_issues)
    single_user.large(5)
コード例 #31
0
ファイル: overlap.py プロジェクト: pwang724/PHD_experiment
def _get_overlap_odor(res, delete_non_selective):
    def _subsets(S, m):
        return set(itertools.combinations(S, m))

    new = defaultdict(list)
    mice = np.unique(res['mouse'])
    for mouse in mice:
        mouse_res = filter.filter(res, filter_dict={'mouse': mouse})
        days = np.unique(mouse_res['day'])
        for day in days:
            mouse_day_res = filter.filter(mouse_res,
                                          filter_dict={
                                              'day': day,
                                              'odor_valence': ['CS+', 'CS-']
                                          })

            odors, odor_ix = np.unique(mouse_day_res['odor_standard'],
                                       return_index=True)
            assert len(odor_ix) == 4, 'Number of odors does not equal 4'
            all_comparisons = _subsets(odor_ix, 2)
            for comparison in all_comparisons:
                mask1 = mouse_day_res['sig'][comparison[0]]
                mask2 = mouse_day_res['sig'][comparison[1]]

                if delete_non_selective:
                    non_selective_mask = _respond_to_all(mouse_day_res['sig'])
                    mask1 = np.all(
                        [mask1, np.invert(non_selective_mask)],
                        axis=0).astype(int)
                    mask2 = np.all(
                        [mask2, np.invert(non_selective_mask)],
                        axis=0).astype(int)

                overlap = _overlap(np.where(mask1)[0], np.where(mask2)[0])
                new['Overlap'].append(overlap)
                new['mouse'].append(mouse)
                new['day'].append(day)
                if comparison == (0, 1):
                    new['condition'].append('+:+')
                elif comparison == (2, 3):
                    new['condition'].append('-:-')
                else:
                    new['condition'].append('+:-')
    for key, val in new.items():
        new[key] = np.array(val)
    return new
コード例 #32
0
def display_data():
    income = request.form.get('income', 'any')
    prof = request.form.get('profession', 'any')
    church = request.form.get('church', 'Total_churches')
    safety = request.form.get('safety', 'any')
    data = filter.filter(income, prof, church, safety)
    return render_template('table.html',
                           data=data,
                           logged_in=('logged_in' in session))
コード例 #33
0
def _filter(res):
    out = defaultdict(list)
    for mouse in np.unique(res['mouse']):
        temp = filter.filter(res, {'mouse': mouse})
        data = temp['ball_data'].flatten()
        max, min = np.max(data), np.min(data)
        if (max - min) > 4:
            reduce.chain_defaultdicts(out, temp)
    return out
コード例 #34
0
ファイル: responsive.py プロジェクト: pwang724/PHD_experiment
def plot_summary_water(res, start_days, end_days, figure_path):
    ax_args_copy = ax_args.copy()
    res = copy.copy(res)
    get_responsive_cells(res)
    list_of_days = list(zip(start_days, end_days))
    mice = np.unique(res['mouse'])
    start_end_day_res = filter.filter_days_per_mouse(
        res, days_per_mouse=list_of_days)
    add_naive_learned(start_end_day_res, start_days, end_days, 'a', 'b')
    odor_list = ['US']
    colors = ['Turquoise']
    ax_args_copy.update({'xlim': [-1, 2]})
    for i, odor in enumerate(odor_list):
        plot.plot_results(start_end_day_res,
                          select_dict={'odor_standard': odor},
                          x_key='training_day',
                          y_key='Fraction Responsive',
                          loop_keys='mouse',
                          colors=[colors[i]] * len(mice),
                          path=figure_path,
                          plot_args=line_args,
                          ax_args=ax_args_copy,
                          fig_size=(1.6, 1.5),
                          legend=False)

    before_csm = filter.filter(start_end_day_res,
                               filter_dict={
                                   'training_day': 'a',
                                   'odor_standard': 'US'
                               })
    after_csm = filter.filter(start_end_day_res,
                              filter_dict={
                                  'training_day': 'b',
                                  'odor_standard': 'US'
                              })

    from scipy.stats import ranksums, wilcoxon, kruskal
    print('Before PT CS+: {}'.format(np.mean(
        before_csm['Fraction Responsive'])))
    print('After PT CS+: {}'.format(np.mean(after_csm['Fraction Responsive'])))
    print('Wilcoxin:{}'.format(
        wilcoxon(before_csm['Fraction Responsive'],
                 after_csm['Fraction Responsive'])))
コード例 #35
0
def enricher(id):
    query = FlickrQuery(gconfig.flickrAPI,gconfig.flickrSecret)
    event = eventinfo(id)
    logger = logfile.logger(gconfig.logdir + '/%s.txt' % event.id)
    logger.info('query event information')
    if not event.succ:
        logger.info( "can not find such event" )
        return
    logger.info('query photos with machine tag')
    idlist = query.searchbyid(event.id)
    db = Download(gconfig.tmpdir + '/%s' % event.id)
    db.download(idlist)
    #query.outputlist(idlist, event.id, 'list/idlist_%s.txt' % event.id)
    logger.info('query photos with text info')
    titlelist = query.searchbytitle(event.title,event.stime,event.id)
    db.download(titlelist)
    #query.outputlist(titlelist,event.id, 'list/titlelist_%s.txt' % event.id)
    logger.info('query photos with geo info')
    geolist = query.searchbygeo(event.lat,event.lng,event.stime,event.id)
    db.download(geolist)
    #query.outputlist(geolist,event.id,'list/geolist_%s.txt' % event.id)

    logger.info('parsing features')    
    feature = getfeature()
    feature.run(gconfig.tmpdir + '/%s' % event.id)
    
    #trainfile = 'list/idlist_%s.txt' % event.id
    trainlist = []
    for url in idlist:
        fname = url.split('/')[-1]
        fname = gconfig.tmpdir + '/%s' % event.id + '/' + fname
        trainlist.append(fname.replace('.jpg','_ch.txt'))
        
    testlist = []
    for url in titlelist:
        fname = url.split('/')[-1]
        fname = gconfig.tmpdir + '/%s' % event.id + '/' + fname
        testlist.append(fname.replace('.jpg','_ch.txt'))

    logger.info('visual pruning')        
    myfilter = filter(trainlist,testlist)
    r = myfilter.filter()
    lst = []
    for idx in r:
        lst.append(testlist[idx])

    logger.info('refining')        
    myrefine = refine(event.id,lst)
    results = myrefine.refine()
    newresults = query.geturlbyid(results,titlelist)

    logger.info('output result')    
    query.OutputXML(event.id,idlist,titlelist,geolist,newresults)
    query.OutputHtml(event.id,idlist,titlelist,geolist,newresults)
    logger.info('event-finished')    
コード例 #36
0
ファイル: lfp.py プロジェクト: neuropy/neuropy
    def filter(self, chanis=None, f0=0, f1=7, fr=0.5, gpass=0.01, gstop=30, ftype='ellip'):
        """Bandpass filter data on row indices chanis, between f0 and f1 (Hz), with filter
        rolloff (?) fr (Hz). Done in-place.

        ftype: 'ellip', 'butter', 'cheby1', 'cheby2', 'bessel'
        """
        data = self.get_data()
        if chanis == None:
            chanis = np.arange(len(data))
        data = data[chanis]
        data, b, a = filter.filter(data, self.sampfreq, f0, f1, fr, gpass, gstop, ftype)
        self.data[chanis] = data
        return b, a
コード例 #37
0
ファイル: test_process.py プロジェクト: demon108/stream
def process():
    conn_old = mysql.connect('bsppr', '192.168.241.7')
    mysql.insert(conn_old,'set names utf8')
    conn_new = mysql.connect('bsppr', '192.168.241.32')
    mysql.insert(conn_new,'set names utf8')
    cinfos_old = get_cinfos(conn_old)
    #print cinfos_old
    cinfos_new = get_cinfos(conn_new)
    mongo_conn = get_mongo_conn()
    tablename = 'weixin'
    tmpdatas = mongo.find(mongo_conn, tablename, {},50)
    rawdatas = []
    for raw in tmpdatas:
	date = raw['pubtime']
	now = datetime.datetime.now()
	diff = now - date
	print diff.days
	rawdatas.append(raw)
    if len(rawdatas)==0:
        time.sleep(10)
    raw_old_qualified = filter(cinfos_old,rawdatas)
    old_insert_num = feed_xpost.feed_data_to_xpost(conn_old, raw_old_qualified,'old')
    raw_new_qualified = filter(cinfos_new,rawdatas)
    new_insert_num = feed_xpost.feed_data_to_xpost(conn_new, raw_new_qualified,'new')
コード例 #38
0
ファイル: parse.py プロジェクト: baipenghan/Email_Foldering
def get_msg_info(root_path):
    user_array = []

    for user in os.listdir(root_path):
        if user == "farmer-d":
            user_path = os.path.join(root_path, user)
            msg_array = get_msg_info_user(user_path)

            sorted_msg_array = sorted(msg_array, cmp=compare)

            for msg in sorted_msg_array:
                msg.dict_info = filter.filter(msg.header_info + msg.body_info)
            user_array.append(msg_array)

    return user_array
コード例 #39
0
ファイル: geturl.py プロジェクト: alioxp/blogmark
	def load(self,name):
		if os.path.exists(name) == False:
			return
		fobj = open(name,'rU')
		index = 0;
		key = '';
		value = '';
		for eachLine in fobj:
			if index == 0:
				key = filter(name,eachLine[:-1])
				index = 1
			else:
				#value = eachLine;
				self.allLink[key] = eachLine[:-1] 
				index = 0				
		fobj.close()
コード例 #40
0
ファイル: reader.py プロジェクト: hunse/nengo_1.4
 def get(self,name,time=None,filter=None,normalize=False,keys=None):
     if name not in self.cache:
         data=[]
         done_header=False
         index=self.header.index(name)
         with open(os.path.join(self.dir,self.filename)) as f:
             for row in f:
                 row=row.strip()
                 if len(row)==0 or row.startswith('#'): 
                     continue
                 if not done_header:
                     done_header=True
                     continue
                 data.append(self.parse(row.split(',')[index]))
         data=np.array(data)        
         self.cache[name]=data
     else:
         data=self.cache[name]    
     if keys is not None:
         data2=np.zeros((len(data),len(keys)),dtype=float)            
         for i,d in enumerate(data):
             scale=1.0
             if normalize:
                 length=np.sqrt(sum([v*v for v in d.values()]))
                 if length>0.01: scale=1.0/length
             for j,key in enumerate(keys):
                 data2[i][j]=d.get(key,0)*scale
         data=data2
         normalize=False
                 
     if filter is not None:
         data=filter.filter(data,self.time[1]-self.time[0],tau=filter)            
     if time is not None:
         if isinstance(time,(float,int)):
             data=data[self.get_index_for_time(time)]
         else:
             data=data[self.get_index_for_time(time[0]):self.get_index_for_time(time[1])]        
     if normalize:
         for i,v in enumerate(data):
             length=np.linalg.norm(v)
             if length>0.1:
                 data[i]/=length
             
     return data
コード例 #41
0
ファイル: feedgotv.py プロジェクト: duyvk/GTVRec
def bulk_store_clip_vector(vecs, start_id=0):
    """
    """
    if isinstance(vecs, list):
#        clips = []
        #id = get_highest_pk(ClipVector)
        id = start_id
        for vec in vecs:
            if isinstance(vec, dict) and filter(vec, 'clip'):
                if settings.VECTOR_ON_MONGO:
                    clip1 = vec.pop('clip1st').clipID
                    clip2 = vec.pop('clip2nd').clipID
                    clip_vec = ClipVector2(clip1st=clip1, clip2nd=clip2, **vec)
                else:
                    id += 1
                    clip_vec = ClipVector(pkey=id, **vec)
#                    clips.append(clip_vec)
                if CLIPS_QUEUE.full():
                    print 'queue full, freeze'
                    time.sleep(0.5)
                CLIPS_QUEUE.put(clip_vec)
        return True
#        if clips:
#            n_cluster = len(clips) / int(BULK_INSERT_ITEMS) + 1
#            for i in range(n_cluster):
#                offset = i*BULK_INSERT_ITEMS
#                end = offset + BULK_INSERT_ITEMS
#                cluster = clips[offset:end]
#                try:
#                    if cluster:
#                        ClipVector.objects.bulk_create(cluster)
#                except DatabaseError as dbe:
#                    print 'debug [index=%d, n_clusters=%d, n_records=%d, cluster_size=%d]' % \
#                          (i, n_cluster, len(clips), BULK_INSERT_ITEMS)
#                    raise dbe
#            return True
    return False
コード例 #42
0
ファイル: feedgotv.py プロジェクト: duyvk/GTVRec
def bulk_store_movie_vector(vecs, start_id=0):
    """
    """
    if isinstance(vecs, list):
#        movies = []
        #id = get_highest_pk(MovieVector)
        id = start_id
        for vec in vecs:
            if isinstance(vec, dict) and filter(vec, 'movie'):
                if settings.VECTOR_ON_MONGO:
                    movie1 = vec.pop('movie1st').movieID
                    movie2 = vec.pop('movie2nd').movieID
                    movie_vec = MovieVector2(movie1st=movie1, movie2nd=movie2, **vec)
                else:
                    id += 1
                    movie_vec = MovieVector(pkey=id, **vec)
#                    movies.append(movie_vec)
                if MOVIES_QUEUE.full():
                    print 'queue full, freeze'
                    time.sleep(0.5)
                MOVIES_QUEUE.put(movie_vec)
        return True
#        if movies:
#            n_cluster = len(movies) / int(BULK_INSERT_ITEMS) + 1
#            for i in range(n_cluster):
#                offset = i*BULK_INSERT_ITEMS
#                end = offset + BULK_INSERT_ITEMS
#                cluster = movies[offset:end]
#                try:
#                    if cluster:
#                        MovieVector.objects.bulk_create(cluster)
#                except DatabaseError as dbe:
#                    print 'debug [index=%d, n_clusters=%d, n_records=%d, cluster_size=%d]' % \
#                          (i, n_cluster, len(movies), BULK_INSERT_ITEMS)
#                    raise dbe
#            return True
    return False
コード例 #43
0
ファイル: precheck.py プロジェクト: harveyaot/recruitment
 def __init__(self):
     self.filter = filter()
     self.db_helper = DBHelper()
     self.cmp_table = 'refined_list_info'
     self.table = 'extracted_info'
     self.cmp_clms = [COMPANY_NAME,MEETING_TIME,MEETING_LOCATION,ORIGIN_URL,RELEASE_DATE,RECRUIT_TITLE]
コード例 #44
0
ファイル: reader.py プロジェクト: Elhamahm/nengo_1.4
    def get(self,name,time=None,filter=None,normalize=False,keys=None):
        """
        Return a column of data from the csv

        Parameters:
        WRITEME
        """
        if name not in self.cache:
            data=[]
            done_header=False
            index=self.header.index(name)
            with open(os.path.join(self.dir,self.filename)) as f:
                for row in f:
                    row=row.strip()
                    if len(row)==0 or row.startswith('#'): 
                        continue
                    if not done_header:
                        done_header=True
                        continue
                    data.append(self.parse(row.split(',')[index]))
            data=np.array(data)        
            self.cache[name]=data
        else:
            data=self.cache[name]    
        # one of the types of data in the csv file is a *string* of the form
        # "8a;9b;<...>"
        # This string represent a vector (semantic pointer) in terms of
        # a projections onto named [non-orthogonalized] basis elements.
        # The numeric prefixes are the inner products, and the character suffixes
        # name the basis elements.
        #
        # if `keys` is specified, then it means to only pay attention to the
        # explicitly named suffix *keys*. Otherwise all of them are returned.
        # 
        if keys is not None:
            data2=np.zeros((len(data),len(keys)),dtype=float)            
            for i,d in enumerate(data):
                scale=1.0
                if normalize:
                    length=np.sqrt(sum([v*v for v in d.values()]))
                    if length>0.01: scale=1.0/length
                for j,key in enumerate(keys):
                    data2[i][j]=d.get(key,0)*scale
            data=data2
            # -- normalize has already been done in the previous loop
            normalize=False
                    
        if filter is not None:
            data=filter.filter(data,self.time[1]-self.time[0],tau=filter)            
        if time is not None:
            if isinstance(time,(float,int)):
                data=data[self.get_index_for_time(time)]
            else:
                data=data[self.get_index_for_time(time[0]):self.get_index_for_time(time[1])]        
        if normalize:
            for i,v in enumerate(data):
                length=np.linalg.norm(v)
                if length>0.1:
                    data[i]/=length
                
        return data
コード例 #45
0
ファイル: filterApi.py プロジェクト: youzan/YZSpamFilter
fstop = open(stopwords_file)
totalStop = fstop.readlines()
fstop.close()
stops = []
for s in totalStop:
    s = s.strip()
    stops.append(s)

threshold = configs["threshold"]
classify_model = configs["classify_model"]
if not os.path.exists(classify_model):
    print "ERROR: you should have a filter model"
    exit(-1)

with open(classify_model, "rb") as file:
    f = filter(Algorithm)
    t = pickle.load(file)
    f.Algorithm.loadmodel(t)
    file.close()


class SpamFilter(Resource):

    """
      垃圾信息过滤服务

    """

    def get(self):
        if "query" not in request.args:
            abort(404, message="parameter `query` doestn't exist")
コード例 #46
0
ファイル: geturl.py プロジェクト: alioxp/blogmark
	def handle_data(self, data):
		if self.flg==1:
			self.result.insertLink(filter(self.convertedUrl ,data.strip()),self.link)
			self.flg=0	#重置标志,进行下次迭代
コード例 #47
0
ファイル: main.py プロジェクト: thomas-marcoux/assignments
import observations
import filter

r = filter.filter(observations.horses)
print(r)
コード例 #48
0
ファイル: datacheck.py プロジェクト: harveyaot/recruitment
 def __init__(self):
     self.filter = filter()
コード例 #49
0
ファイル: spider.py プロジェクト: cfhb/Python_Codes
        strlist = re.split('\"',content)
        urlset = set([])
        for strstr in strlist:
            #python正则匹配\时,需要\\\\表示
            #if re.match('http://.*com(/|\w)+', str):
            #这个正则有点简单,只匹配了当前网站
            #if re.match('http://'+domain, str):
            rules="http://"+domain+"[^,^ ^  ^']*"
            #strstr是unicode对象
            result=re.compile(rules).findall(strstr.encode("utf-8"))
            #result是一个数组
            if len(result)==0:
                pass
            else:
                for i in result:
                    urlset.add(i)
        return list(urlset)
if __name__=="__main__":
    if len(sys.argv)!=3:
        print "usage:"+sys.argv[0]+" http://test.com/"+" depth"
        print "example:"+sys.argv[0]+" http://127.0.0.1/a.php?c=1"+" 3"
    else:
        domain=sys.argv[1].split('/')[2]
        #保存最开始的url
        tmp=[]
        tmp.insert(0,sys.argv[1]);
        saveurl(tmp)
        #开始抓取
        main(sys.argv[1],0)
        filter.filter()
コード例 #50
0
ファイル: views.py プロジェクト: zljiljana/TaggerNews
def render_filter():
    userTag = request.args.get('ID')
    filter_snippet = filter(userTag)
    return render_template("output.html", snippet = filter_snippet)
コード例 #51
0
def QueryFinalPhotos(id):
    ffname = gconfig.metadatadir + '/%s/final.json' % id
    if os.path.exists(ffname):
        return ReadData(ffname)
    else:
        WaitAll(id)
        downloadAll(id)
        
        fname = gconfig.metadatadir + '/%s/id.json' % id
        if not os.path.exists(fname):
            QueryPhotobyId(id)
        
        tmp = ReadData(fname)
        idlist = [t['photo'] for t in tmp['photos']]

        event = LoadEventInfo('http://data.linkedevents.org/event/' + id)
        tmp = event['stime'].split('T')[0]
        stime = datetime.strptime(tmp,'%Y-%m-%d')
        
        fname = gconfig.metadatadir + '/%s/title.json' % id
        if not os.path.exists(fname):
            QueryPhotobyTitle(id,event['title'],stime)

        tmp = ReadData(fname)
        titlelist = [t['photo'] for t in tmp['photos']]
        print "titlelist", len(titlelist)

        fname = gconfig.metadatadir + '/%s/geo.json' % id
        if not os.path.exists(fname):
            QueryPhotobyGeo(event['id'],(event['lat'],event['lng']),stime)
            
        tmp = ReadData(fname)
        geolist = [t['photo'] for t in tmp['photos']]
        
        trainlist = []
        for url in idlist:
            fname = url.split('/')[-1]
            fname = gconfig.tmpdir + '/%s' % id + '/' + fname
            trainlist.append(fname.replace('.jpg','_ch.txt'))
            
        testlist = []
        alldata = titlelist + geolist
        
        for url in alldata:
            fname = url.split('/')[-1]
            fname = gconfig.tmpdir + '/%s' % id + '/' + fname
            testlist.append(fname.replace('.jpg','_ch.txt'))
        
        feature = getfeature()
        feature.run(gconfig.tmpdir + '/%s' % id)
    
        myfilter = filter(trainlist,testlist)
        r = myfilter.filter()  #return the index in testing data
        lst = []
        for idx in r:
            lst.append(testlist[idx])
        print "the number of pruned is %d" % len(lst)
        myrefine = refine(id,lst)
        results = myrefine.refine()
        
        newresults = query.geturlbyid(results,alldata)
        ftable = query.OutputJson(idlist + newresults)
        
        strdata = json.dumps(ftable)
        jsonfile = open(ffname,'w')
        jsonfile.write(strdata)
        jsonfile.close()
        return ftable