Esempio n. 1
0
def main(mode: str):
    """
    Main program. Either fetches new articles from RSS feeds or analyzes them using Textrazor.
    :param mode:    One of 'fetch' or 'analyze'.
    :return:        Nothing, only prints messages regarding success or failure.
    """
    # time-related
    now = datetime.now()

    # configuration, database connection
    conf = Config()
    db = DatabaseClient(conf.database.host, conf.database.database,
                        conf.database.user, conf.database.password,
                        conf.database.port)

    # fetch / analyze articles
    if mode == "fetch":
        fetch(conf, db, now.isoformat())
    elif mode == "analyze":
        analyze(conf, db, now)
    else:
        raise ValueError("--mode must be one of 'fetch', 'analyze'.")

    # commit any changes to database
    db.connection.commit()
 def test_analyze(self):
     self.assertEqual(analyze([]),
         {"avg": 0.0, "n": 0, "variance": 0.0})
     self.assertEqual(analyze([3.0]),
         {"avg": 3.0, "n": 1, "variance": 0.0})
     self.assertEqual(analyze([1.0, 2.0, 3.0, 4.0]),
         {"avg": 2.5, "n": 4, "variance": 5.0/3.0})
     self.assertEqual(analyze([1.0, 2.0, 3.0, 4.0], linear_weights),
         {"avg": 2.0, "n": 4, "variance": 2.0})
Esempio n. 3
0
def main():
    """
    main executable
    check times, put into intervals
    """

    # load parsed data
    data, driver_hash = getData()

    # analyze data
    analyze(data, driver_hash)
Esempio n. 4
0
def processApps(args, gpapi, apps):
	""" Download and analyze apps on the Google Play Store.
	
	Arguments:
	args       -- the command line arguments object
	gpapi      -- the Google Play API object
	apps       -- dictionary of apps and their meta data
	"""
	createAppFolder(args)
	i = 0
	j = 0
	
	for app in apps:
		if shouldProcess(apps[app]):
			j += 1		
	print "found {:,} apps to process".format(j)
	
	pos = getRestorePoint(args)
	
	for app,meta in apps.iteritems():
			
		# we only care about apps which require INTERNET permission, we haven't checked yet, and are free
		if not shouldProcess(meta):
			continue
			
		# skip until at the position where we should resume
		i += 1
		if i < pos:
			continue
			
		# create restore point
		if i % args.restore_freq == 0 and i > 0 and args.restore_freq > 0:
			createRestorePoint(args, apps, i)
		
		# print progress
		sys.stdout.write("\rprocessing apps... %6.2f%% %10s: %s\033[K " % (100.0 * i / j, "app", app))
		sys.stdout.flush()
		
		try:
			fname = args.app_dir + app + ".apk"
			if download(gpapi, fname, app, meta['version'], meta['offer']):
				analyze(apps, fname, app)
				os.remove(fname)	
		except:
			None
	sys.stdout.write("\rdone processing apps\033[K\n")
	sys.stdout.flush()
			
	# clean up
	print "saving to cache"
	clearRestorePoint(args, apps)
	deleteAppFolder(args)
 def test_analyze(self):
     self.assertEqual(analyze([]), {"avg": 0.0, "n": 0, "variance": 0.0})
     self.assertEqual(analyze([3.0]), {"avg": 3.0, "n": 1, "variance": 0.0})
     self.assertEqual(analyze([1.0, 2.0, 3.0, 4.0]), {
         "avg": 2.5,
         "n": 4,
         "variance": 5.0 / 3.0
     })
     self.assertEqual(analyze([1.0, 2.0, 3.0, 4.0], linear_weights), {
         "avg": 2.0,
         "n": 4,
         "variance": 2.0
     })
Esempio n. 6
0
 def get(self):
     a = analyze()
     header = """
     <html>
     <head>
     <title>Green Tweets</Title>
     """
     self.write(header)
     for tweet in tweetstore:
         screen_name = ""
         text = ""
         if a.cleanstr(tweet['text']) == '':
             #print "Skipping: " + tweet['text']
             pass
         else:
             if tweet.has_key('text'):
                 text = tweet['text'].encode('ascii', 'ignore')
                 text = convertLinks(text)
             if tweet.has_key('screen_name'):
                 screen_name = "@" + tweet['screen_name'].encode(
                     'ascii', 'ignore') + ": "
             elif tweet.has_key('user'):
                 if tweet['user'].has_key('screen_name'):
                     screen_name = "@" + tweet['user'][
                         'screen_name'].encode('ascii', 'ignore') + ": "
                 if a.finduser(screen_name) or (
                         a.findword(text)) or a.findterm(text):
                     self.write(tweet)
                     self.write("\n")
     finhdr = """
     </head>
     <body style="background:lightgray">
     """
     self.write("</body></html>")
Esempio n. 7
0
 def get(self):
     a = analyze()
     header = """
     <html>
     <head>
     <title>Twitter Client</Title>
     <meta http-equiv="refresh" content="60" />
     </head>
     <body style="background:lightgray">
     """
     self.write(header)
     for tweet in rtstore:
         if tweet.has_key('text'):
             text = tweet['text'].encode('ascii', 'ignore')
             text = convertLinks(text)
         if tweet.has_key('screen_name'):
             screen_name = "@" + tweet['screen_name'].encode(
                 'ascii', 'ignore') + ": "
         elif tweet.has_key('user'):
             if tweet['user'].has_key('screen_name'):
                 screen_name = "@" + tweet['user']['screen_name'].encode(
                     'ascii', 'ignore') + ": "
         self.write("<p>")
         self.write(screen_name)
         self.write(text)
         self.write("</p>")
         self.write("</body></html>")
Esempio n. 8
0
    def __init__(self):
        QMainWindow.__init__(self)
        self.setAttribute(QtCore.Qt.WA_DeleteOnClose)
        self.setWindowTitle("application main window")

        self.file_menu = QMenu('&File', self)
        self.file_menu.addAction('&Quit', self.fileQuit,
                                 QtCore.Qt.CTRL + QtCore.Qt.Key_Q)
        self.menuBar().addMenu(self.file_menu)

        self.help_menu = QMenu('&Help', self)
        self.menuBar().addSeparator()
        self.menuBar().addMenu(self.help_menu)

        self.help_menu.addAction('&About', self.about)

        self.main_widget = QWidget(self)

        file = "D:/Users/User/Documents/GitHub/gg/gggg.pcap"
        df = analyze(file, self)
        ax = create_plot(df, )

        l = QVBoxLayout(self.main_widget)
        sc = MyStaticMplCanvas(ax,
                               self.main_widget,
                               width=5,
                               height=4,
                               dpi=100)
        l.addWidget(sc)

        self.main_widget.setFocus()
        self.setCentralWidget(self.main_widget)

        self.statusBar().showMessage("All hail matplotlib!", 2000)
Esempio n. 9
0
def process(packets):
    packets_to_analyze = []
    packets_to_analyze_index = []
    packet_index = 0
    for packet in packets:
        if Raw in packet:
            packets_to_analyze.append(packet[Raw].load)
            packets_to_analyze_index.append(packet_index)
        packet_index += 1
    strings_to_analyze = [''] * len(packets_to_analyze)
    index = -1
    for load in packets_to_analyze:
        first_char = ord(load[0])
        if first_char < 20 and load.find("json") != -1:
            index += 1
            strings_to_analyze[index] = load
        else:
            string = strings_to_analyze[index]
            string += load
            strings_to_analyze[index] = string
    token = -2
    for load in strings_to_analyze:
        tmp_token = analyze(load)
        if tmp_token > 0:
            token = tmp_token
    if token > 0:
        for packet in packets:
            if Raw in packet:
                str_token = hex(token)[2:len(hex(token))-1]
                token += 1
                str_token_plus = hex(token)[2:len(hex(token))-1]
                packet[Raw].load = packet[Raw].load.replace(str_token, str_token_plus)
Esempio n. 10
0
 def get(self):
     a = analyze()
     header = """
     <html>
     <head>
     <title>Green Tweets</Title>
     </head>
     <body style="background:lightgray">
     """
     self.write(header)
     for tweet in reversed(greenstore):
         screen_name = ""
         text = ""
         if tweet.has_key('text'):
             text = tweet['text'].encode('ascii', 'ignore')
             text = convertLinks(text)
         if tweet.has_key('screen_name'):
             screen_name = "@" + tweet['screen_name'].encode('ascii', 'ignore') + ": "
         elif tweet.has_key('user'):
             if tweet['user'].has_key('screen_name'):
                 screen_name = "@" + tweet['user']['screen_name'].encode('ascii', 'ignore') + ": "
         img_url = ""
         if tweet['user'].has_key('profile_image_url'):
             img_url = tweet['user']['profile_image_url']
         self.write("<p style='background:white; color: green'>")
         if img_url != "":
             self.write("<img src='" + img_url + "' style='float:left'>")
         self.write("<a href='http://twitter.com/" + screen_name[1:-2] + "' target='_blank'>" + screen_name + "</a>")
         self.write("<br/>")
         self.write(text)
         if tweet.has_key('created_at'):
             self.write("<br/>" + tweet['created_at'].encode('ascii', 'ignore'))
         self.write("</p>")
     self.write("</body></html>")
def test_wordcount(get_text_mock, spark_context):
    get_text_mock.return_value = "foo bar foo"

    result = analyze(spark_context)

    assert result[0] == ('foo', 2)
    assert result[1] == ('bar', 1)
Esempio n. 12
0
 def analyze_file(self):
     file = QtWidgets.QFileDialog.getOpenFileName(
         self.MainWindow,
         "Open a File",
         filter="Wireshark capture file (*.pcap;*.pcapng);;All Files (*.*)")
     self.df = analyze(file[0])
     if self.df is not None:
         self.ui.actionCreateGraph.setEnabled(True)
Esempio n. 13
0
def handle_messages():
    print "Handling Messages"
    payload = request.get_data()
    print payload
    for sender, message in messaging_events(payload):
        print "Incoming from %s: %s" % (sender, message)
        send_message(PAT, sender, analyze(message))
    return "ok"
Esempio n. 14
0
def test(filename, graph):
    total = []
    data, features, labels = parse_csv(filename)

    SGD_pred = runSGD(data)
    total.append(analyze(SGD_pred, labels))

    SVC_pred = runSVC(data)
    total.append(analyze(SVC_pred, labels))

    MNB_pred = runMNB(data)
    total.append(analyze(MNB_pred, labels))

    # each element in total looks like:
    # [tn, fp, fn, tp, precision, recall, acc, fscore]
    if (graph):
        bargraph([i[0] for i in total], [i[1] for i in total], [i[2] for i in total], [i[3] for i in total], [i[4] for i in total], [i[5] for i in total], [i[6] for i in total], [i[7] for i in total])
Esempio n. 15
0
    def run(self):
        print "Starting consumer thread"
        a = analyze()
        global queue
        global tweetstore
        global rtstore
        block = ["insafediver", "MakeUseOf", "healthyworld24", "ISSAboveYou", "JoeGumby1", "Gumbletech"]
        blockterms = ["invest", "market", "nasa", "untuk"]
        prefuser = ["davewiner", "scobleizer"]
        while True:
            #lock.acquire()
            condition.acquire()
            if not queue:
                #print "Nothing in queue: printer will try again"
                condition.wait()
                pass
            else:
                num = queue.pop()
                screen_name = ""
                text = ""
                if num.has_key('user'):
                    if num['user'].has_key('screen_name'):
                        screen_name = num['user']['screen_name'].encode('ascii', 'ignore')
                if num.has_key('text'):
                    text = num['text'].encode('ascii', 'ignore')
                if screen_name in prefuser:
#                    print screen_name, text
                    for client in wsckts:
                        wsckts[client]['object'].send_msg(num)
                    greenstore.append(num)
                    # if self.client:
                    #     self.db.tweets.insert_one(num)
                elif text.startswith('RT'):
                    #rtstore.append(num)
                    pass
                elif (text.find('http') != -1):
                    if (screen_name not in block):
                        blck = True
                        for term in blockterms:
                            if text.lower().find(term) > 0:
                                blck = False

                        if (blck) and not (a.cleanstr(text).isupper()):
                            if isgreen(num):
                                for client in wsckts:
                                    wsckts[client]['object'].send_msg(num)
                                greenstore.append(num)
                                # if self.client:
                                #     self.db.tweets.insert_one(num)
                            else:
                                for client in rwsckts:
                                    rwsckts[client]['object'].send_msg(num)
                                tweetstore.append(num)
                                # if self.client:
                                #     self.db2.redtweets.insert_one(num)
            condition.release()
Esempio n. 16
0
def add_entry():
    db = get_db()
    text = request.form['text']
    analysis = analyze(text)
    bytes = cPickle.dumps(analysis[1], 1)
    db.execute('insert into entries (text, time, tones) values (?, ?, ?)',
               [text, analysis[0], str(bytes)])
    db.commit()
    flash('New entry was successfully posted')
    return redirect(url_for('analyzeWeb'))
Esempio n. 17
0
 def save_and_analyze_file(self):
     file_name = QtWidgets.QFileDialog.getSaveFileName(
         self.MainWindow,
         "Save into a File",
         filter="Wireshark capture file (*.pcap;*.pcapng);;All Files (*.*)")
     if file_name[0]:
         self.sniffer.write_into_pcap(file_path_name=file_name[0])
     self.df = analyze(file_name[0])
     if self.df is not None:
         self.ui.actionCreateGraph.setEnabled(True)
Esempio n. 18
0
def isgreen(tweet):
    if tweet.has_key('text'):
        text = tweet['text'].encode('ascii', 'ignore')
        text = convertLinks(text)
    if tweet.has_key('screen_name'):
        screen_name = "@" + tweet['screen_name'].encode('ascii', 'ignore') + ": "
    elif tweet.has_key('user'):
        if tweet['user'].has_key('screen_name'):
            screen_name = "@" + tweet['user']['screen_name'].encode('ascii', 'ignore') + ": "
    a = analyze()
    if a.finduser(screen_name) or (a.findword(text) or (a.findterm(text))):
        return True
    else:
        return False
Esempio n. 19
0
 def get(self):
     a = analyze()
     header = """
     <html>
     <head>
     <title>Green Tweets</Title>
     <meta http-equiv="refresh" content="30" />
     </head>
     <body style="background:lightgray">
     """
     self.write(header)
     for tweet in tweetstore:
         screen_name = ""
         text = ""
         if a.cleanstr(tweet['text']) == '':
             #print "Skipping: " + tweet['text']
             pass
         else:
             if tweet.has_key('text'):
                 text = tweet['text'].encode('ascii', 'ignore')
                 text = convertLinks(text)
             if tweet.has_key('screen_name'):
                 screen_name = "@" + tweet['screen_name'].encode(
                     'ascii', 'ignore') + ": "
             elif tweet.has_key('user'):
                 if tweet['user'].has_key('screen_name'):
                     screen_name = "@" + tweet['user'][
                         'screen_name'].encode('ascii', 'ignore') + ": "
                 if a.finduser(screen_name) or (a.findword(text)) or (
                         a.findterm(text)):
                     img_url = ""
                     if tweet['user'].has_key('profile_image_url'):
                         img_url = tweet['user']['profile_image_url']
                     self.write(
                         "<p style='background:white; color: green'>")
                     if img_url != "":
                         self.write("<img src='" + img_url +
                                    "' style='float:left'>")
                     self.write("<a href='http://twitter.com/" +
                                screen_name[1:-2] + "' target='_blank'>" +
                                screen_name + "</a>")
                     self.write("<br/>")
                     self.write(text)
                     if tweet.has_key('created_at'):
                         self.write(
                             "<br/>" +
                             tweet['created_at'].encode('ascii', 'ignore'))
                     self.write("</p>")
     self.write("</body></html>")
Esempio n. 20
0
 def get(self):
     a = analyze()
     header = """
     <html>
     <head>
     <title>Twitter Client</Title>
     <meta http-equiv="refresh" content="60" />
     </head>
     <body style="background:lightgray">
     """
     self.write(header)
     self.write("<a href='/green'>Green Tweets</A>")
     self.write("<a href='/red'>Red Tweets</A>")
     self.write("<a href='/stats'>Stats</A>")
     self.write("<A href='/rt'>RTs</A>")
     self.write("<A href='gjson'>Green Tweets JSON</A>")
     self.write("</body></html>")
Esempio n. 21
0
    def get(self):
        a = analyze()
        header = """
        <html>
        <head>
        <title>Green Tweets</Title>
        """
        self.write(header)
        finhdr = """
        </head>
        <body style="background:lightgray">
        """
        self.write(finhdr)
        for tweet in greenstore:
            self.write(tweet)
            self.write("\n")

        self.write("</body></html>")
Esempio n. 22
0
 def get(self):
     a = analyze()
     header = """
     <html>
     <head>
     <title>Twitter Client</Title>
     <meta http-equiv="refresh" content="60" />
     </head>
     <body style="background:lightgray">
     """
     self.write(header)
     for tweet in tweetstore:
         screen_name = ""
         text = ""
         if a.cleanstr(tweet['text']) == '':
             #print "Skipping: " + tweet['text']
             pass
         else:
             if tweet.has_key('text'):
                 text = tweet['text'].encode('ascii', 'ignore')
                 text = convertLinks(text)
             if tweet.has_key('screen_name'):
                 screen_name = "@" + tweet['screen_name'].encode(
                     'ascii', 'ignore') + ": "
             elif tweet.has_key('user'):
                 if tweet['user'].has_key('screen_name'):
                     screen_name = "@" + tweet['user'][
                         'screen_name'].encode('ascii', 'ignore') + ": "
                 if a.finduser(screen_name):
                     self.write("<p style='color: green'>")
                 elif a.findword(text) or a.findterm(text):
                     self.write("<p style='color: green'>")
                     txt = a.appendword(text)
                     if txt != "":
                         text = txt
                 else:
                     self.write("<p style='color: red'>")
                 self.write(screen_name)
                 self.write(text)
                 self.write("</p>")
     self.write("</body></html>")
Esempio n. 23
0
 	catDir = cat[0]+'_nT'+cat[1]+'_nW'+cat[2]+'_nB'+cat[3]
 	datahists = {}
 	bkghists  = {}
 	sighists  = {}
 	if len(sys.argv)>1: outDir=sys.argv[1]
 	else: 
		outDir = os.getcwd()
		outDir+='/'+pfix
		if not os.path.exists(outDir): os.system('mkdir '+outDir)
		outDir+='/'+cutString
		if not os.path.exists(outDir): os.system('mkdir '+outDir)
		outDir+='/'+catDir
		if not os.path.exists(outDir): os.system('mkdir '+outDir)
 	category = {'isEM':cat[0],'nttag':cat[1],'nWtag':cat[2],'nbtag':cat[3]}
 	for data in dataList: 
 		datahists.update(analyze(tTreeData,data,cutList,isotrig,False,doJetRwt,iPlot,plotList[iPlot],category,region))
 		if catInd==nCats: del tFileData[data]
 	for bkg in bkgList: 
 		bkghists.update(analyze(tTreeBkg,bkg,cutList,isotrig,doAllSys,doJetRwt,iPlot,plotList[iPlot],category,region))
 		if catInd==nCats: del tFileBkg[bkg]
 		if doAllSys and catInd==nCats:
 			for syst in shapesFiles:
 				for ud in ['Up','Down']: del tFileBkg[bkg+syst+ud]
 	for sig in sigList: 
 		for decay in decays: 
 			sighists.update(analyze(tTreeSig,sig+decay,cutList,isotrig,doAllSys,doJetRwt,iPlot,plotList[iPlot],category,region))
 			if catInd==nCats: del tFileSig[sig+decay]
 			if doAllSys and catInd==nCats:
 				for syst in shapesFiles:
 					for ud in ['Up','Down']: del tFileSig[sig+decay+syst+ud]
 	if doQ2sys: 
def test_wordcount_analyze(_, __):
    result = analyze(Context())
    assert len(result) == 327
    assert result[:6] == [('ut', 17), ('eu', 16), ('vel', 14), ('nec', 14), ('quis', 12), ('vitae', 12)]
Esempio n. 25
0
from analyze import *
import summary as sm

pcaps = all_pcaps()
local, remote, dir = next(pcaps)

results = analyze(local, remote, dir)
local_sender = results[0]
local_sender_steady = results[1]
remote_sender = results[2]
remote_sender_steady = results[3]

throughput_quantiles, rtt_quantiles, host, protocol, start_time, _ = results[0]
rtt_quantiles
from mock import patch
from pysparkling import Context
from jobs.{{cookiecutter.job}} import analyze

@patch('jobs.{{cookiecutter.job}}.{{ cookiecutter.project.replace(' ', '').replace('-', '') }}Context.initalize_counter')
@patch('jobs.{{cookiecutter.job}}.{{ cookiecutter.project.replace(' ', '').replace('-', '') }}Context.inc_counter')
def test_{{cookiecutter.job}}_analyze(_, __):
    result = analyze(Context())
    assert len(result) == 327
Esempio n. 27
0
        outDir += '/' + pfix
        if not os.path.exists(outDir): os.system('mkdir ' + outDir)
        #outDir+='/'+cutString
        if not os.path.exists(outDir): os.system('mkdir ' + outDir)
        outDir += '/' + catDir
        if not os.path.exists(outDir): os.system('mkdir ' + outDir)
    category = {
        'isEM': cat[0],
        'nttag': cat[1],
        'nWtag': cat[2],
        'nbtag': cat[3],
        'njets': cat[4]
    }
    for data in dataList:
        datahists.update(
            analyze(tTreeData, data, cutList, False, iPlot, plotList[iPlot],
                    category))
        if catInd == nCats: del tFileData[data]
    pickle.dump(datahists, open(outDir + '/datahists_' + iPlot + '.p', 'wb'))
    catInd += 1

catInd = 1
for cat in catList:
    if not runBkgs: break
    catDir = cat[0] + '_nT' + cat[1] + '_nW' + cat[2] + '_nB' + cat[
        3] + '_nJ' + cat[4]
    catDir = catDir.replace('_nT0p',
                            '').replace('_nW0p',
                                        '').replace('_nB0p',
                                                    '').replace('_nJ0p', '')
    bkghists = {}
    if len(sys.argv) > 1: outDir = sys.argv[1]
Esempio n. 28
0
        if not os.path.exists(outDir): os.system('mkdir ' + outDir)
        outDir += '/' + catDir
        if not os.path.exists(outDir): os.system('mkdir ' + outDir)
    category = {
        'isEM': cat[0],
        'nttag': cat[1],
        'nWtag': cat[2],
        'nbtag': cat[3],
        'njets': cat[4]
    }
    for data in dataList:
        print "*****" * 20
        print "*****" * 20
        print "[data] : ", category, region, isCategorized
        datahists.update(
            analyze(tTreeData, data, data, cutList, False, doJetRwt, iPlot,
                    plotList[iPlot], category, region, isCategorized))
        if catInd == nCats: del tFileData[data]
    pickle.dump(datahists, open(outDir + '/datahists_' + iPlot + '.p', 'wb'))
    catInd += 1

catInd = 1
for cat in catList:
    if not runBkgs: break
    if skip(cat[4], cat[3]) and isCategorized:
        continue  #DO YOU WANT TO HAVE THIS??
    catDir = cat[0] + '_nT' + cat[1] + '_nW' + cat[2] + '_nB' + cat[
        3] + '_nJ' + cat[4]
    bkghists = {}
    if len(sys.argv) > 1: outDir = sys.argv[1]
    else:
        outDir = os.getcwd()
Esempio n. 29
0
	catDir = cat[0]+'_nT'+cat[1]+'_nW'+cat[2]+'_nB'+cat[3]
	datahists = {}
	bkghists  = {}
	sighists  = {}
	if len(sys.argv)>1: outDir=sys.argv[1]
	else: 
		outDir = os.getcwd()+'/'
		outDir+=pfix
		if not os.path.exists(outDir): os.system('mkdir '+outDir)
		if not os.path.exists(outDir+'/'+cutString): os.system('mkdir '+outDir+'/'+cutString)
		outDir+='/'+cutString
		if not os.path.exists(outDir+'/'+catDir): os.system('mkdir '+outDir+'/'+catDir)
		outDir+='/'+catDir
	category = {'isEM':cat[0],'nttag':cat[1],'nWtag':cat[2],'nbtag':cat[3]}
	for data in dataList: 
		datahists.update(analyze(tTreeData,data,cutList,False,iPlot,plotList[iPlot],category))
		if catInd==nCats: del tFileData[data]
	for bkg in bkgList: 
		bkghists.update(analyze(tTreeBkg,bkg,cutList,doAllSys,iPlot,plotList[iPlot],category))
		if catInd==nCats: del tFileBkg[bkg]
		if doAllSys and catInd==nCats:
			for syst in shapesFiles:
				if 'DataDriven' in bkg: continue
				for ud in ['Up','Down']: del tFileBkg[bkg+syst+ud]
	for sig in sigList: 
		for decay in decays: 
			sighists.update(analyze(tTreeSig,sig+decay,cutList,doAllSys,iPlot,plotList[iPlot],category))
			if catInd==nCats: del tFileSig[sig+decay]
			if doAllSys and catInd==nCats:
				for syst in shapesFiles:
					if 'DataDriven' in bkg: continue			
Esempio n. 30
0
import json
from analyze import *
from load_data import load_data

if __name__ == '__main__':
  with open('keys.json') as f:
    key = json.loads(f.read())['usda-api-key']
  #load_data(key)
  analyze()
  
Esempio n. 31
0
     outDir = os.getcwd()
     outDir += '/' + pfix
     if not os.path.exists(outDir): os.system('mkdir ' + outDir)
     outDir += '/' + cutString
     if not os.path.exists(outDir): os.system('mkdir ' + outDir)
     outDir += '/' + catDir
     if not os.path.exists(outDir): os.system('mkdir ' + outDir)
 category = {
     'isEM': cat[0],
     'nttag': cat[1],
     'nWtag': cat[2],
     'nbtag': cat[3]
 }
 for data in dataList:
     datahists.update(
         analyze(tTreeData, data, cutList, isotrig, False, doJetRwt, iPlot,
                 plotList[iPlot], category, region))
     if catInd == nCats: del tFileData[data]
 for bkg in bkgList:
     bkghists.update(
         analyze(tTreeBkg, bkg, cutList, isotrig, doAllSys, doJetRwt, iPlot,
                 plotList[iPlot], category, region))
     if catInd == nCats: del tFileBkg[bkg]
     if doAllSys and catInd == nCats:
         for syst in shapesFiles:
             for ud in ['Up', 'Down']:
                 del tFileBkg[bkg + syst + ud]
 for sig in sigList:
     for decay in decays:
         sighists.update(
             analyze(tTreeSig, sig + decay, cutList, isotrig, doAllSys,
                     doJetRwt, iPlot, plotList[iPlot], category, region))
Esempio n. 32
0
    if len(syslist) > 1:
        print('Systems:')
        for sysname in sorted(syslist):
            print("    " + sysname)

        print()

    # analyze the systems:
    if len(syslist) == 1 and syslist[0] == '':
        print('no system?')
        usage()
        sys.exit(1)

    switches = (show_events, show_disk, show_details, show_filesystem, show_mem, show_ping)
    for sysname in sorted(syslist):
        analyze(sysname, allSystems, switches)

    print()

##------------------------------------------------------------------------------
## pretty-print the resulting dictionary:
#print('allSystems:')
#print()
#pp.pprint(allSystems)
#print()

# ----------------------------------------------------------------------------
#
# ----------------------------------------------------------------------------

# EOF:
Esempio n. 33
0
                      outfile = outfile)

    return datafiles
    

if __name__ == '__main__':    

    parser = argparse.ArgumentParser(description="Run the benchmark tests")
    parser.add_argument('--engines', type=str, nargs='+', default=['ode','pqp','fcl'],
                        help="The collision checkers to test")
    args = parser.parse_args()

    # Self collision
    self_collision_data = run_self_collision(args.engines)
    out_basename = os.path.join(package_path, 'results', 'self_collision')
    analyze(self_collision_data, title='Self Collision', out_basename=out_basename)
    
    # Empty envirnment
    empty_collision_data = run_environment_collision(args.engines)
    out_basename = os.path.join(package_path, 'results', 'empty_env_collision')
    analyze(empty_collision_data, title='Empty Environment Collisions', out_basename=out_basename)

    # PR kitchen
    from catkin.find_in_workspaces import find_in_workspaces
    kitchen_env = find_in_workspaces(
        search_dirs=['share'],
        project='pr_ordata',
        path='data/kitchen/pr_kitchen.env.xml',
        first_match_only=True)[0]
        
    kitchen_collision_data = run_environment_collision(args.engines, kitchen_env, 'kitchen')
import timefrom gensim.models.word2vec import Word2Vecfrom Utils.string_utils import clean_strfrom Utils.file_utils import find_filesfrom analysis_pipeline import analyze, debug_analyzefrom analysis_pipeline import build_synonym_filter, fact_case_sensitive_stop_word_filter, fact_stop_word_filterfrom analysis_pipeline import fact_is_synonym_filter, white_space_tokenize, remove_punct_at_end_filter, lower_case_filter, remove_empty_tokens_filterfrom Config.train_word2vec_model_config import TrainWord2VecModelConfigimport sys
""" TRAIN Word 2 Vec Model"""
if len(sys.argv) != 2: raise Exception("Incorrect number of arguments passed - one expected, the config file name")
config = TrainWord2VecModelConfig(sys.argv[1])
""" Load analysis chain """syn_mapper = build_synonym_filter(config.keywords_files, config.case_sensitive)
if config.case_sensitive:    stop_filter = fact_case_sensitive_stop_word_filter(config.stop_words_file)else:    stop_filter = fact_stop_word_filter(config.stop_words_file)
# Simon Hughes: This is quite inefficient, as each function is applied in turn# resulting in multiple passes over the token stream. While not currently a# big performance bottleneck, could be much faster.#  - TODO: use functional composition to speed upis_a_synonym_filter = fact_is_synonym_filter(syn_mapper)analysis_chain = [clean_str,                  white_space_tokenize,                  remove_punct_at_end_filter,                  lower_case_filter,                  stop_filter,                  syn_mapper.map_synonyms,                  remove_empty_tokens_filter] # is_a_synonym_filter] - Un-comment to just train on keywords.
#Test#rslt = debug_analyze("$150k as400 Sr.\ Java/j2ee and the C#.! developer. FIT \"HOT\" dev. -IBM's business, sql server management", analysis_chain)
""" Load Documents """start = time.time()
sentences = []files = find_files(config.processed_documents_folder, config.file_mask, True)print("%s files found in %s" % (len(files), config.processed_documents_folder))
documents = []for i, fname in enumerate(files): with open(fname) as f:        contents = f.read()        sentences.extend(contents.split("\n"))end = time.time()print("Loading %i sentences took %s seconds" % (len(sentences), str(end - start)))
""" Analyze - clean, tokenize, extract phrases """print("%i sentences to process" % len(sentences))
tokenized = []print("Tokenizing sentences")for i, sent in enumerate(sentences):    tokens = analyze(sent, analysis_chain) if len(tokens) >= config.min_sentence_length_words:        tokenized.append(tokens) if i % 100000 == 0: print(i)
""" Train Model """
start = time.time()
print("Training Model. This could take a while (10-60 mins for moderate collections). Get a coffee")model = Word2Vec(tokenized, iter=config.training_iterations, size=config.vector_size, window=config.window_size, min_count=config.min_word_count, workers=config.workers, sample=1e-5, hs=0, negative=20)model.save(config.model_file)end = time.time()print "Took %s seconds" % (end - start)
Esempio n. 35
0
    print()

    # display list of systems we found:
    syslist = list(allSystems)
    if len(syslist) > 1:
        print('Systems:')
        for sysname in sorted(syslist):
            print("    " + sysname)

        print()

    # analyze the systems:
    if len(syslist) > 0:
        for sysname in sorted(syslist):
            analyze(sysname, allSystems)

        print()

    # close the output file if we had one:
    if lclvars.outfile != None:
        lclvars.outfile.close()

##------------------------------------------------------------------------------
## pretty-print the resulting dictionary:
#print('allSystems:')
#print()
#pp.pprint(allSystems)
#print()

# ----------------------------------------------------------------------------