コード例 #1
0
ファイル: queryKeywordRank.py プロジェクト: eox03y/works
def do_many_days_s3_APPS(argv):
	if len(argv) < 3:
		print "usage: start_day end_day"
		print "usage: 2011-10-7 2011-10-13"
		sys.exit()

	QUERY_FILES = [
		'euospcomp07.osp_query.log',
		'euospcomp08.osp_query.log',
		'euospsch01.osp_query.log',
		'euospsch03.osp_query.log',
		'euospsch01.2.osp_query.log',
		'euospsch03.2.osp_query.log',
	]
	CLICK_FILES = [
		'euospsch03.osp_click.log',
		'euospcomp08.osp_click.log'
	]

	#QUERY_FILES = QUERY_FILES[:2]

	daylist = handolUtil.get_day_list(argv[1],argv[2])
	analyzer = LogAnalyzer("APPS", "%s-%s" %(daylist[0], daylist[-1]))
	stopwatch = handolUtil.StopWatch()
	for daystr in daylist:
		for q in QUERY_FILES:
			s3_file = "DEVELOPING/app/7nmc1m75ij/apps-log/query_log/%s/%s.%s" % (daystr, q, daystr)
			analyzer.querylog_from_s3("sch-emr", s3_file)
			print "Loading & ETL: %f sec" % (stopwatch.laptime())

	analyzer.get_stats()
	print "Calc Stats: %f sec" % (stopwatch.laptime())
	analyzer.write_info()
コード例 #2
0
ファイル: matchQueryClickLog.py プロジェクト: eox03y/works
    def get_stats_from_qcmatched(self, fname):
        """ calculate statistics from Query/Click matched log
		"""
        stopwatch = handolUtil.StopWatch()
        stopwatch.start()
        qcmatched_list = QcLogAnalyzer.loadfile_qc_matched(fname)
        print "Loading : %f sec" % (stopwatch.laptime())
        stopwatch.start()

        for v in qcmatched_list:
            im = v[0]
            num_clicks = int(v[1])
            click_pos = int(v[2])
            tot = int(v[3])
            devtype = v[4]
            dos = v[5]
            dm = v[6]
            shop = v[7]
            keyword = v[8]
            if tot == 0: nores = 1
            else: nores = 0
            if num_clicks > 0:
                anyclick = 1
            else:
                anyclick = 0

            if is_non_ascii(keyword):
                english = 0
            else:
                english = 1

            os_shop = "%s_%s" % (dos, shop)
            shop_keyw = (shop, keyword)
            self.langD.add(english,
                           [1, anyclick, nores, click_pos, num_clicks])
            self.imD.add(im,
                         [1, anyclick, nores, click_pos, num_clicks, english
                          ])  # query count, click
            self.keywordD.add(
                keyword, [1, anyclick, nores, click_pos, num_clicks, english])
            self.shopkeywordD.add(shop_keyw,
                                  [1, anyclick, nores, click_pos, num_clicks])
            self.shopD.add(
                shop, [1, anyclick, nores, click_pos, num_clicks, english])
            self.osshopD.add(
                os_shop, [1, anyclick, nores, click_pos, num_clicks, english])
            self.devtypeD.add(
                devtype, [1, anyclick, nores, click_pos, num_clicks, english])
            self.dosD.add(dos,
                          [1, anyclick, nores, click_pos, num_clicks, english])
            self.dmD.add(dm,
                         [1, anyclick, nores, click_pos, num_clicks, english])
            #self.diffsecD.add(diffsec, [1, anyclick, nores, click_pos, len(clicklist)])

            if tot == 0:
                self.zeroD.add(keyword, [1, english])
                #self.zeroshopD.add(shop, [1])
                #self.zeroshopkeywordD.add(shop_keyw, [1])

        print "Calculating : %f sec" % (stopwatch.laptime())
コード例 #3
0
ファイル: queryKeywordRank.py プロジェクト: eox03y/works
def test_w_local_LHRHVH():
	analyzer = LogAnalyzer("LHRHVH", "any")
	stopwatch = handolUtil.StopWatch()

	analyzer.querylog_from_file("osp_query.log.20130306")
	print "Loading & ETL: %f sec" % (stopwatch.laptime())

	analyzer.get_stats()
	print "Calc Stats: %f sec" % (stopwatch.laptime())
	analyzer.write_info()
コード例 #4
0
ファイル: queryKeywordRank.py プロジェクト: eox03y/works
def test_w_local_APPS():
	analyzer = LogAnalyzer("APPS", "any")
	stopwatch = handolUtil.StopWatch()

	analyzer.querylog_from_file("euospsch03.2.osp_query.log.20130215")
	#analyzer.querylog_from_file("a.query.log")
	print "Loading & ETL: %f sec" % (stopwatch.laptime())

	analyzer.get_stats()
	print "Calc Stats: %f sec" % (stopwatch.laptime())
	analyzer.write_info()
コード例 #5
0
    def main(self, bucket, cfiles, qfiles, outfile):
        stopwatch = handolUtil.StopWatch()
        for cfile in cfiles:
            self.add_click_file(cfile, bucket)
            print "Loading Click - %s: %f sec" % (cfile, stopwatch.laptime())

        qclistAll = []
        for qfile in qfiles:
            qclist = self.add_query_file(qfile, bucket)
            qclistAll += qclist
            print "Load Query & Match - %s: %f sec" % (qfile,
                                                       stopwatch.laptime())

        QueryClickMatcher.save_qc_match(qclistAll, outfile)
コード例 #6
0
ファイル: matchQueryClickLog.py プロジェクト: eox03y/works
def load_qcmatched(org_log_dir, daystr):
    os.chdir(org_log_dir)
    print "DIR:", os.getcwd()
    print "DAY:", daystr

    stopwatch = handolUtil.StopWatch()
    stopwatch.start()
    analyzer = QcLogAnalyzer(daystr)
    QCLOG = "qc_matched.all.%s.log" % daystr
    analyzer.get_stats_from_qcmatched(QCLOG)
    print "Loading & Statistics: %f sec" % (stopwatch.laptime())

    stopwatch.start()
    analyzer.print_stats('%s.all.csv' % (daystr))
    print "Saving output: %f sec" % (stopwatch.laptime())
コード例 #7
0
ファイル: queryKeywordRank.py プロジェクト: eox03y/works
def do_many_days_s3_LHRHVH(argv):
	if len(argv) < 3:
		print "usage: start_day end_day"
		print "usage: 2011-10-7 2011-10-13"
		sys.exit()

	daylist = handolUtil.get_day_list(argv[1],argv[2])
	analyzer = LogAnalyzer("LHRHVH", "%s-%s" %(daylist[0], daylist[-1]))
	stopwatch = handolUtil.StopWatch()
	for daystr in daylist:
		s3_file = "DEVELOPING/app/7nmc1m75ij/hubs1-log/query_log/osp_query.log.%s" % (daystr)
		analyzer.querylog_from_s3("sch-emr", s3_file)
		print "Loading & ETL: %f sec" % (stopwatch.laptime())
		s3_file = "DEVELOPING/app/7nmc1m75ij/hubs2-log/query_log/osp_query.log.%s" % (daystr)
		analyzer.querylog_from_s3("sch-emr", s3_file)
		print "Loading & ETL: %f sec" % (stopwatch.laptime())

	analyzer.get_stats()
	print "Calc Stats: %f sec" % (stopwatch.laptime())
	analyzer.write_info()
コード例 #8
0
ファイル: misspellCorrection.py プロジェクト: eox03y/works
def load_keywords_stat(fname, resfname, wikif):
    """
	"""
    watch = handolUtil.StopWatch()
    print "Loading ...", wikif
    wikiStat = {}
    fp = codecs.open(wikif, 'rb', encoding='utf-8')
    for line in fp:
        flds = line.split()
        w = flds[0].lower().replace('_', ' ')
        n = int(flds[1])
        if not wikiStat.has_key(w):
            wikiStat[w] = n

    fp.close()
    print "Loaded: %f" % (watch.laptime())
    """
	== keywords.stat
	3997563 270333  6410    facebook
	2399787 186241  45449   whatsapp
	1433213 71764   1458    whats app
	1315718 130860  2040    temple run
	1004372 163723  272     games
	947351  87185   19846   skype
	829801  316731  834     angry birds
	529552  64385   1416    fruit ninja
	"""
    print "Loading ...", fname
    highKeywords = DictDict()
    noresKeywords = []
    fp = codecs.open(fname, 'rb', encoding='utf-8')
    for line in fp:
        flds = line.split('\t')
        q = int(flds[0])
        c = int(flds[1])
        nores = int(flds[2])
        keyword = flds[3].strip()
        kflds = keyword.split()
        if len(kflds) > 2: continue
        noresratio = (nores * 100) / q

        if q > 10 and noresratio > 50:
            if not wikiStat.has_key(keyword):
                noresKeywords.append([keyword, q, noresratio])
        if q > 150 and noresratio < 4:
            highKeywords.insert(keyword, [q, noresratio])
    fp.close()
    print "Loaded: %f" % (watch.laptime())
    print "noresKeywords: %d" % (len(noresKeywords))
    print "highKeywords: %d" % (len(highKeywords.D))

    fp = codecs.open(resfname, 'wb', encoding='utf-8')
    for noresK in noresKeywords:
        nearest = highKeywords.searchNearest(noresK[0], noresK[1])
        if nearest:
            prnformat = u'\t'.join(
                map(lambda (d, x, y, z): "%d\t%s\t%d\t%d" % (d, x, y, z),
                    nearest))
            fp.write("%s\t%d\t%d\t%s\n" %
                     (noresK[0], noresK[1], noresK[2], prnformat))
    fp.close()
    print "Processed: %f" % (watch.laptime())
コード例 #9
0
ファイル: matchQueryClickLog.py プロジェクト: eox03y/works
    stopwatch = handolUtil.StopWatch()
    stopwatch.start()
    analyzer = QcLogAnalyzer(daystr)
    QCLOG = "qc_matched.all.%s.log" % daystr
    analyzer.get_stats_from_qcmatched(QCLOG)
    print "Loading & Statistics: %f sec" % (stopwatch.laptime())

    stopwatch.start()
    analyzer.print_stats('%s.all.csv' % (daystr))
    print "Saving output: %f sec" % (stopwatch.laptime())

    #analyzer.print_info('%s.csv' % (daystr))
    #analyzer.save_to_mongo()
    #analyzer.save_keywords()


if __name__ == "__main__":
    import os
    import sys
    #os.chdir(sys.argv[1])
    #daystr = sys.argv[2]
    org_log_dir = u"D:/0001/9 org logs/20130215"
    daystr = '20130215'
    stopwatch = handolUtil.StopWatch()
    stopwatch.start()
    #save_qcmatched(org_log_dir, daystr)
    load_qcmatched(org_log_dir, daystr)
    print is_non_ascii('avcd')
    print is_non_ascii('微信')