def makePatch(oldfl, newfl, patchdir, ignorebin=False): cmd = ['diff', '-Nu', oldfl, newfl] if not ignorebin: cmd.append('-a') # No patches for symbolic links. if os.path.islink(newfl): return False assert newfl.startswith(cfg.DEVDIR + '/') patchpath = newfl[len(cfg.DEVDIR)+1:] outfile = '_'.join(patchpath.split('/')) + ".patch" outpath = os.path.join(patchdir, outfile) proc = subprocess.Popen(cmd, stdout=PIPE) diffout = proc.communicate()[0] if MAJOR >= 3: enc = sys.stdout.encoding if enc is None: enc = 'utf-8' diffout = diffout.decode(enc) if ignorebin and diffout.startswith('Binary files '): return False out("Saving patch for %s to %s" % (newfl, outpath)) outfl = open(outpath, 'w') outfl.write(diffout) outfl.write("\n") outfl.flush() outfl.close() return True
def add_profexec(self, group_id): """Insert a record of profexec table. @param self @param group_id id of profgroup table @return the id of profexec table """ exec_time = self.infodic["exec_time"] start_ts = self.infodic["start_ts"] i_dic = { "profgroup_id": group_id, "exec_time": exec_time, "start_ts": start_ts} sql_s = """SELECT id FROM profexec WHERE profgroup_id = ? AND start_ts = ?;""" # large integer has suffix "L", which should be removed by str() rtup = self.conn.select(sql_s, (group_id, str(start_ts))) if len(rtup) == 0: if self.options.verbose >= 1: util.out("No such profexec. will newly insert...") rdic = self.conn.insert("profexec", i_dic) exec_id = rdic["id"] if self.options.verbose >= 2: util.out("new exec id", exec_id) return exec_id else: raise Exception("Same Profexec exists, aborting")
def __setup_savedir(self, basedir, savedir): out('Setting up session directory "%s"' % savedir) if not os.path.exists(basedir): os.makedirs(basedir) f = open(os.path.join(basedir, self.blacklistfile), "w") f.close() return True
def _string_ids(self, df, g_id, regex=r'(#\w+)', data_dir=None): fp = '' if data_dir is not None: hash_path = data_dir + 'hashtag_sim.csv' ment_path = data_dir + 'mention_sim.csv' link_path = data_dir + 'link_sim.csv' if regex == r'(#\w+)': fp = hash_path elif regex == r'(@\w+)': fp = ment_path elif regex == r'(http[^\s]+)': fp = link_path if data_dir is not None and os.path.exists(fp): ut.out('reading sim file...', 0) r_df = pd.read_csv(fp) r_df = r_df[r_df['com_id'].isin(df['com_id'])] g_df = r_df.groupby(g_id).size().reset_index() g_df = g_df[g_df[0] > 1] r_df = r_df[r_df[g_id].isin(g_df[g_id])] else: group = g_id.replace('_id', '') regex = re.compile(regex) inrel = [] for _, row in df.iterrows(): s = self._get_items(row.text, regex) inrel.append({'com_id': row.com_id, group: s}) inrel_df = pd.DataFrame(inrel).drop_duplicates() inrel_df = inrel_df[inrel_df[group] != ''] r_df = self._cols_to_ids(inrel_df, g_id=g_id, cols=[group]) return r_df
def __create_win(self,keep_numbering,wins_trans,pid,hostgroup,rootgroup,win,time,group,type,title,filter,scrollback_len,processes): if keep_numbering: winarg=win else: winarg="" if type[0]=='b': if win in self.force_start: primer_arg=self.primer_arg+'S' else: primer_arg=self.primer_arg if win in self.scroll or not self._scrollfile or not os.path.exists(os.path.join(self.homedir,self.projectsdir,self._scrollfile+win)): scrollfile='0' else: scrollfile=self._scrollfile+win #print ('-h %s -t \"%s\" %s %s %s %s %s %s' % (scrollback_len,title,winarg,self.primer,primer_arg,self.projectsdir, scrollfile,os.path.join(self.savedir,"win_"+win))) self.screen('-h %s -t \"%s\" %s %s %s %s %s %s' % (scrollback_len,title,winarg,self.primer,primer_arg,self.projectsdir, scrollfile,os.path.join(self.savedir,"win_"+win)) ) #self.screen('-h %s -t \"%s\" %s %s %s %s %s %s' % (scrollback_len,title,winarg,self.primer,primer_arg,self.projectsdir,"0",os.path.join(self.savedir,"win_"+win)) ) elif type[0]=='g': self.screen('-t \"%s\" %s //group' % (title,winarg ) ) else: out ('%s Unknown window type "%s". Ignoring.'%(win,type)) return -1 newwin = self.number() return newwin
def __setup_savedir(self, basedir, savedir): out("Setting up session directory \"%s\"" % savedir) if not os.path.exists(basedir): os.makedirs(basedir) f = open(os.path.join(basedir, self.blacklistfile), "w") f.close() return True
def __move_all_windows(self,shift,group,kill=False): homewindow=int(self.homewindow) # create wrap group for existing windows if not self.bNoGroupWrap: self.screen('-t \"%s\" //group' % ('%s_%s'%(group,self.__unique_ident)) ) self.group(False,self.none_group) self.wrap_group_id=self.number() # move windows by shift and put them in a wrap group #for cwin,cgroupid,ctype,ctty in sc.gen_all_windows_fast(self.pid): for cwin,cgroupid,cgroup,ctty,ctype,ctypestr,ctitle,cfilter,cscroll,ctime in sc.gen_all_windows_full(self.pid): iwin=int(cwin) if iwin==homewindow: homewindow=iwin+shift self.homewindow=str(homewindow) cgroupid,cgroup = self.get_group(cwin) if not self.bNoGroupWrap and cgroup==self.none_group: self.select(self.wrap_group_id) self.group(False,group,str(cwin)) command='%s -p %s -X number +%d' % (self.sc,cwin,shift) if not self.bNoGroupWrap and str(cwin)==str(self.wrap_group_id): out('Moving wrap group %s to %d'%(cwin,iwin+shift)) self.wrap_group_id=str(iwin+shift) else: out('Moving window %s to %d'%(cwin,iwin+shift)) os.system(command) self.select('%d'%(homewindow))
def __rollback(self,cmdline): try: cmdline=cmdline.split('\0') if cmdline[3]=='0': requireme(self.homedir,cmdline[2], cmdline[4]) else: requireme(self.homedir,cmdline[2], cmdline[3]) path=os.path.join(self.homedir,cmdline[2],cmdline[4]) fhead,ftail=os.path.split(cmdline[4]) target=os.path.join(self.homedir,self.projectsdir,self.savedir,ftail+'__rollback') number=ftail.split('_')[1] oldsavedir=fhead # import win_* files from previous savefiles try: shutil.move(os.path.join(self.homedir,cmdline[2],cmdline[4]),target) except Exception,e: out(str(e)) target=None pass # import hardcopy.* files from previous savefiles fhead,ftail=os.path.split(cmdline[3]) target2=os.path.join(self.homedir,self.projectsdir,self.savedir,ftail+'__rollback') try: shutil.move(os.path.join(self.homedir,cmdline[2],cmdline[3]),target2) except Exception,e: #out(str(e)) target2=None pass
def find_subgraphs(self, df, relations, max_size=40000, verbose=False): if verbose: t1 = ut.out('finding subgraphs...') if verbose: t1 = ut.out('building networkx graph...') g = self.build_networkx_graph(df, relations) ccs = list(nx.connected_components(g)) if verbose: ut.time(t1) if verbose: t1 = ut.out('processing connected components...') subgraphs = self._process_components(ccs, g) if verbose: ut.time(t1) # t1 = ut.out('filtering redundant subgraphs...') # subgraphs = self._filter_redundant_subgraphs(subgraphs, df) # ut.time(t1) # t1 = ut.out('removing single edge hubs...') # subgraphs = self._remove_single_edge_hubs(subgraphs, g) # ut.time(t1) # t1 = ut.out('compiling single node subgraphs...') # subgraphs += self._single_node_subgraphs(subgraphs, df, max_size) # ut.time(t1) if verbose: self._print_subgraphs_size(subgraphs) return g, subgraphs
def run_benchmarks(debug=False, overwrite=False, refine=None, synonly=False, service=False, apps=None): results = RunResults('benchmarks', overwrite) if refine is None: refine = 0 cases = load_app_sources(BENCHMARK_DIR, defwarn=True, apps=apps) apps = list(cases.keys()) apps.sort() for appname in apps: inps = cases[appname] srcfls = inps[0] poldict = inps[1] seeds = inps[2] apppath = inps[3] opts = inps[4] opts.append('-N') opts.append(appname) if synonly: opts.append('-z') if appname in LARGE_BENCHMARKS: # Forgo interprocedural analysis for these benchmarks. opts.append('-P') # Run with each policy file separately. if MAJOR >= 3: politems = poldict.items() else: politems = poldict.iteritems() for poldesc, polfiles in politems: result = RunResult(False, False) results.add(result) out('Analyzing %s' % appname) if service: outp, errp = query_jam_service(srcfls, polfiles, refine=refine, seeds=seeds, moreopts=opts) else: outp, errp = run_jam(srcfls, polfiles, refine=refine, debug=debug, seeds=seeds, moreopts=opts) # Error case, message printed in |run_jam|. if outp is None: continue refsuf = get_suffix(synonly, refine, poldesc) expfile = '%s.%s.out.js' % (appname, refsuf) exppath = os.path.join(apppath, expfile) result.js_ok = process_result(outp, exppath, overwrite) infopath = get_info_path(errp) if infopath is None: err('Could not determine info path: %s\n' % appname) err('ERRP: %s' % errp) continue infoexpfile = '%s.%s.info.txt' % (appname, refsuf) infoexppath = os.path.join(apppath, infoexpfile) result.info_ok = process_info(infopath, infoexppath, overwrite) sys.stderr.write('\n') results.printSummary()
def run_tx_tests(case=None, debug=False, jscmd=JS_COMMAND, moreopts=[]): tot = 0 tot_ok = 0 start = time.time() testcases = load_testcases(JAMSCRIPT_TESTDIR, None, filter=case) for inps in testcases: tot += 1 jspath = inps[0] policies = inps[1] outp = run_tx(jspath, policies, perf=debug, debug=debug, jscmd=jscmd) exppath = os.path.splitext(jspath)[0] + '.exp' stat = validate_output(outp, exppath) if stat == 'match': tot_ok += 1 jsname = os.path.basename(jspath) if debug: sys.stdout.write(outp) if debug or stat != 'match': out('%s %s' % (jsname, stat)) end = time.time() tottime = end - start vals = (tot_ok, tot, tottime) out('%d of %d transaction tests successful; %.2fs\n' % vals)
def load(self): if 'all' in self.force_start: self.primer_arg += 'S' self.force_start = [] if 'all' in self.scroll: self._scrollfile = None out('session "%s" loading "%s"' % (self.pid, os.path.join(self.basedir, self.savedir))) #check if the saved session exists and get the biggest saved window number and a number of saved windows maxnewwindow = 0 newwindows = 0 try: winlist = list(glob.glob(os.path.join(self.basedir, self.savedir, 'win_*'))) newwindows = len(winlist) out('%d new windows' % newwindows) except Exception: sys.stderr.write('Unable to open winlist.\n') return 1 # keep original numbering, move existing windows self.homewindow = self.number() if self.exact: maxnewwindow = -1 for w in winlist: try: w = int(w.rsplit("_", 1)[1]) if w > maxnewwindow: maxnewwindow = w except: pass out('Biggest new window number: %d' % maxnewwindow) if self.enable_layout: self.__remove_all_layouts() self.__move_all_windows(maxnewwindow + 1, self.group_other, False) out("\nLoading windows:") self.__load_screen() if self.enable_layout: out("\nLoading layouts:") try: self.__load_layouts() except: sys.stderr.write('Layouts loading failed!\n') # raise self.__restore_mru() sc.cleanup() return 0
def _print_subgraphs_size(self, subgraphs): tot_m, tot_h, tot_e = 0, 0, 0 for ids, hubs, rels, edges in subgraphs: tot_m += len(ids) tot_h += len(hubs) tot_e += edges t = (len(subgraphs), tot_m, tot_h, tot_e) ut.out('subgraphs: %d, msgs: %d, hubs: %d, edges: %d' % t)
def __load_screen(self): homewindow = self.homewindow # out ("Homewindow is " +homewindow) #check if target Screen is currently in some group and set hostgroup to it (hostgroupid, hostgroup) = self.get_group(homewindow) rootwindow = self.number() if self.exact: rootgroup = self.none_group hostgroup = self.none_group elif self.bNoGroupWrap: rootgroup = self.none_group else: #create a root group and put it into host group rootgroup = "RESTORE_" + self.savedir rootwindow = self.screen('-t \"%s\" %s //group' % (rootgroup, 0)) self.group(False, hostgroup, rootwindow) out("restoring Screen session inside window %s (%s)" % (rootwindow, rootgroup)) self.command_at(True, 'setenv SCREENSESSION %s' % os.path.join(self.basedir, self.savedir) ) wins = [] for id in range(0, int(self.MAXWIN_REAL)): try: filename = os.path.join(self.basedir, self.savedir, "win_" + str(id)) if os.path.exists(filename): f = open(filename) win = list(f)[0:9] f.close() win = [x.strip() for x in win] try: nproc = win[8] except: nproc = '0' wins.append(( win[0], win[1], win[2], win[3], self.__escape_bad_chars(win[4]), win[5], win[6], win[7], nproc, )) except Exception,x: sys.stderr.write('%d Unable to load window ( %s )\n' % (id, str(x)))
def gen_relational_ids(self, df, relations, data_dir=None, exact=True): """Generates relational ids for a given dataframe.""" df = df.copy() for relation, group, group_id in relations: ut.out(relation + '...') if exact: df = self._gen_group_id(df, group_id) else: df = self._gen_group_id_lists(df, group_id, data_dir=data_dir) return df
def load(self): if "all" in self.force_start: self.primer_arg += "S" self.force_start = [] if "all" in self.scroll: self._scrollfile = None out('session "%s" loading "%s"' % (self.pid, os.path.join(self.basedir, self.savedir))) # check if the saved session exists and get the biggest saved window number and a number of saved windows maxnewwindow = 0 newwindows = 0 try: winlist = list(glob.glob(os.path.join(self.basedir, self.savedir, "win_*"))) newwindows = len(winlist) out("%d new windows" % newwindows) except Exception: sys.stderr.write("Unable to open winlist.\n") return 1 # keep original numbering, move existing windows self.homewindow = self.number() if self.exact: maxnewwindow = -1 for w in winlist: try: w = int(w.rsplit("_", 1)[1]) if w > maxnewwindow: maxnewwindow = w except: pass out("Biggest new window number: %d" % maxnewwindow) if self.enable_layout: self.__remove_all_layouts() self.__move_all_windows(maxnewwindow + 1, self.group_other, False) out("\nLoading windows:") self.__load_screen() if self.enable_layout: out("\nLoading layouts:") try: self.__load_layouts() except: sys.stderr.write("Layouts loading failed!\n") # raise self.__restore_mru() sc.cleanup() return 0
def retrieve_chunk(df, max_size=5000000, chunk_number=0): if chunk_number == -1: return df for i in range(2, 50): ut.out('splitting into %d chunks...' % i) dfs = np.array_split(df, i) if len(dfs[0]) <= max_size: ut.out('return chunk %d...' % chunk_number) return dfs[chunk_number] return df
def process_result(outp, exppath, overwrite): ok = False if overwrite: stat = overwrite_expected(outp, exppath) if stat == 'overwritten' or stat == 'created': ok = True else: stat = validate_output(outp, exppath) if stat == 'match': ok = True expname = os.path.basename(exppath) out('%s %s' % (expname, stat)) return ok
def run(self, usernames, vote_urls): for vote_url in vote_urls: out(f"Voting for the url {vote_url}") for username in usernames: driver = self.init_driver() self.install_ext(driver) while True: try: self.vote(driver, username, vote_url) break except UnexpectedAlertPresentException: # Captcha Error out(f"Retrying to vote for {username}") continue
def _text_ids(self, df, g_id, data_dir=None): fp = None if data_dir is None else data_dir + 'text_sim.csv' if data_dir is not None and os.path.exists(fp): ut.out('reading sim file...', 0) r_df = pd.read_csv(fp) r_df = r_df[r_df['com_id'].isin(df['com_id'])] g_df = r_df.groupby(g_id).size().reset_index() g_df = g_df[g_df[0] > 1] r_df = r_df[r_df[g_id].isin(g_df[g_id])] else: df = df[df['text'] != ''] r_df = self._cols_to_ids(df, g_id=g_id, cols=['text']) return r_df
def consolidate(self, subgraphs, max_size=40000, div=2): """Combine subgraphs into larger sets to reduce total number of subgraphs to do inference over.""" t1 = ut.out('consolidating subgraphs...') sgs = [] new_ids, new_hubs = set(), set() new_rels, new_edges = set(), 0 for ids, hubs, rels, edges in subgraphs: size = int(len(new_ids) / div) + int(len(ids) / div) size += new_edges + edges if size < max_size: # keep adding to new new_ids.update(ids) new_rels.update(rels) new_hubs.update(hubs) new_edges += edges elif new_edges == 0 and size > max_size: # subgraph too big new_ids.update(ids) new_hubs.update(hubs) new_rels.update(rels) new_edges += edges else: # new is full sgs.append((new_ids, new_hubs, new_rels, new_edges)) new_ids, new_hubs = ids, hubs new_rels, new_edges = rels, edges if len(new_ids) > 0: sgs.append((new_ids, new_hubs, new_rels, new_edges)) ut.time(t1) self._print_subgraphs_size(sgs) return sgs
def __scrollback_clean(self): '''clean up scrollback files: remove empty lines at the beginning and at the end of a file''' for f in glob.glob(os.path.join(self.basedir,self.savedir,'hardcopy.*')): try: ftmp=f+"_tmp" temp=open(ftmp,'w') thefile = open(f,'r') beginning=True for line in thefile: if beginning: if cmp(line,'\n') == 0: line = line.replace('\n','') else: beginning=False temp.write(line) temp.close() thefile.close() temp = open( ftmp, 'r' ) endmark=-1 lockmark=False for i,line in enumerate(temp): if cmp(line,'\n') == 0: if not lockmark: endmark=i lockmark=True else: endmark=-1 lockmark=False temp.close() if endmark > 1: thefile = open(f , 'w') temp=open(ftmp,'r') for i,line in enumerate(temp): if i == endmark: break; else: thefile.write(line) thefile.close() temp.close() util.remove(ftmp) else: util.remove(f) os.rename(ftmp,f) except: out ('Unable to clean scrollback file: '+f)
def printSummary(self): self.end = time.time() tottime = self.end - self.start tot = 0 js_tot = 0 js_ok = 0 info_tot = 0 info_ok = 0 html_tot = 0 html_ok = 0 for res in self.results: assert isinstance(res, RunResult) tot += 1 if res.js_ok is not None: js_tot += 1 if res.js_ok: js_ok += 1 if res.info_ok is not None: info_tot += 1 if res.info_ok: info_ok += 1 if res.html_ok is not None: html_tot += 1 if res.html_ok: html_ok += 1 if self.overwrite: action = 'overwrote' else: action = 'verified' txts = [] if js_tot > 0: jstxt = '%d/%d JS output' % (js_ok, js_tot) txts.append(jstxt) if info_tot > 0: infotxt = '%d/%d info files' % (info_ok, info_tot) txts.append(infotxt) if html_tot > 0: htmltxt = '%d/%d HTML output' % (html_ok, html_tot) txts.append(htmltxt) if len(txts) > 0: restxt = '%s %s' % (action, ', '.join(txts)) else: restxt = 'no results' out('%s for %d %s; %.2fs\n' % (restxt, tot, self.desc, tottime))
def plot_distributions(df, feats=[]): dfs = df[df['label'] == 1] dfh = df[df['label'] == 0] for feat in feats: ut.out('plotting distribution for: %s\n' % feat, 0) f, ax = plt.subplots(1, 1) ns, bs, ps = ax.hist(dfs[feat], normed=1, color='r', alpha=0.69) nh, bh, ph = ax.hist(dfh[feat], normed=1, color='b', alpha=0.69) ms, ss = norm.fit(dfs[feat]) mh, sh = norm.fit(dfh[feat]) ys = mlb.normpdf(bs, ms, ss) yh = mlb.normpdf(bh, mh, sh) ax.plot(bs, ys, 'r--') ax.plot(bh, yh, 'b--') f.savefig(feat + '.pdf', format='pdf', bbox_inches='tight') plt.clf()
def save(self): self.homewindow,title=self.get_number_and_title() out("\nCreating directories:") if not self.__setup_savedir(self.basedir,self.savedir): return 1 if self.enable_layout: out("\nSaving layouts:") self.homewindow_last,title=self.get_number_and_title() self.__save_layouts() out("") out("\nSaving windows:") self.__save_screen() out("\nCleaning up:") self.__scrollback_clean() return 0
def prepare_registration(self): """Prepare data for the profile log registration to the database. @param self """ d = dict() d["profs"] = self.profs d["node_set"] = self.nodeset d["nodes"] = len(d["node_set"]) d["nproc"] = len(self.profs) # XML metadata dictionary from main profile log lcands = [p for p in self.profs if p.filename.endswith("profile.0.0.0")] assert(len(lcands) == 1) d["main_loader"] = lcands[0] d["soupdic"] = util.soup2dic(d["main_loader"].soup) # Place of execution d["place"] = util.getplacename(self.options, d) # Execution time d["start_ts"] = int(d["soupdic"]["Starting Timestamp"]) t0 = int(d["soupdic"]["Starting Timestamp"]) t1 = int(d["soupdic"]["Timestamp"]) d["exec_time"] = (t1 - t0) / 1e6 # library d["library"] = util.NVL(self.options.library, "") # Application name d["app_viewname"] = util.NVL(self.options.appname, "Unknown") # (node, context, thread) => rank mapping d["use_rankmap"] = True d["rankmap"] = {} prof_rank_index = 0 for p in sorted(self.profs, cmp=lambda x, y: Registerer.triple_comparator( util.filename2triple(x.filename), util.filename2triple(y.filename))): triple = util.filename2triple(p.filename) triple_s = ".".join(triple) d["rankmap"][triple_s] = prof_rank_index prof_rank_index += 1 if self.options.verbose >= 3: util.out("Infodic: ", d) ## Dictionary of all information self.infodic = d
def __load_screen(self): homewindow = self.homewindow # out ("Homewindow is " +homewindow) # check if target Screen is currently in some group and set hostgroup to it (hostgroupid, hostgroup) = self.get_group(homewindow) rootwindow = self.number() if self.exact: rootgroup = self.none_group hostgroup = self.none_group elif self.bNoGroupWrap: rootgroup = self.none_group else: # create a root group and put it into host group rootgroup = "RESTORE_" + self.savedir rootwindow = self.screen('-t "%s" %s //group' % (rootgroup, 0)) self.group(False, hostgroup, rootwindow) out("restoring Screen session inside window %s (%s)" % (rootwindow, rootgroup)) self.command_at(True, "setenv SCREENSESSION %s" % os.path.join(self.basedir, self.savedir)) wins = [] for id in range(0, int(self.MAXWIN_REAL)): try: filename = os.path.join(self.basedir, self.savedir, "win_" + str(id)) if os.path.exists(filename): f = open(filename) win = list(f)[0:9] f.close() win = [x.strip() for x in win] try: nproc = win[8] except: nproc = "0" wins.append( (win[0], win[1], win[2], win[3], self.__escape_bad_chars(win[4]), win[5], win[6], win[7], nproc) ) except Exception, x: sys.stderr.write("%d Unable to load window ( %s )\n" % (id, str(x)))
def _prune_redundant_ids(all_ids): result = all_ids.copy() l = [list(x) for x in list(all_ids.values())] ll = [x for sublist in l for x in sublist] group_ids = Counter(ll) ut.out('keys: %d, values: %d...' % (len(all_ids.keys()), len(ll))) for i, (key, vals) in enumerate(all_ids.items()): if len(vals) > 1: redundant_ids = set([v for v in vals if group_ids[v] == 1]) if len(redundant_ids) > 1: redundant_ids.remove(min(redundant_ids)) for redundant_id in redundant_ids: result[key].remove(redundant_id) return result
def purge(): ut.out('purging...') domains = [ 'adclicks', 'ifwe', 'twitter', 'youtube', 'soundcloud', 'russia', 'toxic', 'yelp_hotel', 'yelp_restaurant' ] folders_to_purge = [ 'independent/data/%s/folds/*', 'independent/output/%s/predictions/*', 'relational/output/%s/experiments/*', 'relational/output/%s/predictions/*', 'relational/mrf/*', 'relational/psl/data/%s/*' ] for domain in domains: for folder in folders_to_purge: path = folder % domain if '%s' in folder else folder os.system('rm -rf %s' % path)
def __save_layouts(self): homelayout,homelayoutname=self.get_layout_number() layoutname=homelayoutname if homelayout==-1: out("No layouts to save. Create layouts with \":layout new\"") return False currentlayout=homelayout loop_exit_allowed=False while currentlayout!=homelayout or not loop_exit_allowed: loop_exit_allowed=True sys.stdout.write("%s(%s); "%(currentlayout,layoutname)) self.command_at(False,'eval \'layout dump \"%s\"\' \'dumpscreen layout \"%s\"\' \'layout next\''%(os.path.join(self.basedir,self.savedir,"layout_"+currentlayout+"_"+layoutname),os.path.join(self.basedir,self.savedir,"winlayout_"+currentlayout+"_"+layoutname))) currentlayout,layoutname=self.get_layout_number() linkify(os.path.join(self.basedir,self.savedir),"layout_"+homelayout+"_"+homelayoutname,"last_layout") return True
def __move_all_windows(self, shift, group, kill=False): homewindow = int(self.homewindow) # create a wrap group for existing windows if not self.bNoGroupWrap: self.wrap_group_id = self.screen('-t \"%s\" //group' % ('%s_%s' % (group, self.__unique_ident))) self.group(False, self.none_group, self.wrap_group_id) # move windows by shift and put them in a wrap group #for cwin,cgroupid,ctype,ctty in sc.gen_all_windows_fast(self.pid): for ( cwin, cgroupid, cgroup, ctty, ctype, ctypestr, ctitle, cfilter, cscroll, ctime, cmdargs, ) in sc.gen_all_windows_full(self.pid, sc.require_dumpscreen_window(self.pid, True)): iwin = int(cwin) if iwin == homewindow: homewindow = iwin + shift self.homewindow = str(homewindow) if not self.bNoGroupWrap and cgroup == self.none_group: self.select(self.wrap_group_id) self.group(False, group, str(cwin)) command = '%s -p %s -X number +%d' % (self.sc, cwin, shift) if not self.bNoGroupWrap and str(cwin) == str(self.wrap_group_id): out('Moving wrap group %s to %d' % (cwin, iwin + shift)) self.wrap_group_id = str(iwin + shift) else: out('Moving window %s to %d' % (cwin, iwin + shift)) os.system(command) self.select('%d' % homewindow)
def __save_layouts(self): (homelayout, homelayoutname) = self.get_layout_number() findir = sc.datadir if homelayout == -1: sys.stderr.write("No layouts to save.\n") return False path_layout = os.path.join(findir, "load_layout") oflayout = open(path_layout, "w") ex_lay = [] for lay in sc.gen_layout_info(self, sc.dumpscreen_layout_info(self)): try: num = lay[0] title = lay[1] except: title = "" if self.excluded_layouts and (num in self.excluded_layouts or title in self.excluded_layouts): ex_lay.append(lay) else: sys.stdout.write("%s(%s); " % (num, title)) oflayout.write( """layout select %s layout dump \"%s\" dumpscreen layout \"%s\" """ % (num, os.path.join(findir, "layout_" + num), os.path.join(findir, "winlayout_" + num)) ) oflayout.write("layout select %s\n" % homelayout) oflayout.close() self.source(path_layout) util.remove(path_layout) linkify(findir, "layout_" + homelayout, "last_layout") if ex_lay: sys.stdout.write( """ Excluded layouts: %s""" % str(ex_lay) ) out("") return True
def main(): parser = OptionParser(usage="%prog patchconfig.py") #parser.add_option('-s', '--semantics', action='store_true', default=False, dest='semantics', help='test semantics') opts, args = parser.parse_args() if len(args) != 1: parser.error("Invalid number of arguments") global cfg cfg = imp.load_source("cfg", args[0]) out("Identifying differing files") exclusions = getattr(cfg, 'EXCLUSIONS', []) ignorebin = getattr(cfg, 'IGNORE_BINARY', False) diffFiles = getDiffFiles(cfg.ORIGDIR, cfg.DEVDIR, exclusions, ignorebin=ignorebin) preparePatchDirectory(cfg.PATCHDIR) for oldfl, newfl in diffFiles: makePatch(oldfl, newfl, cfg.PATCHDIR, ignorebin=ignorebin)
def __move_all_windows(self, shift, group, kill=False): homewindow = int(self.homewindow) # create a wrap group for existing windows if not self.bNoGroupWrap: self.wrap_group_id = self.screen('-t "%s" //group' % ("%s_%s" % (group, self.__unique_ident))) self.group(False, self.none_group, self.wrap_group_id) # move windows by shift and put them in a wrap group # for cwin,cgroupid,ctype,ctty in sc.gen_all_windows_fast(self.pid): for ( cwin, cgroupid, cgroup, ctty, ctype, ctypestr, ctitle, cfilter, cscroll, ctime, cmdargs, ) in sc.gen_all_windows_full(self.pid, sc.require_dumpscreen_window(self.pid, True)): iwin = int(cwin) if iwin == homewindow: homewindow = iwin + shift self.homewindow = str(homewindow) if not self.bNoGroupWrap and cgroup == self.none_group: self.select(self.wrap_group_id) self.group(False, group, str(cwin)) command = "%s -p %s -X number +%d" % (self.sc, cwin, shift) if not self.bNoGroupWrap and str(cwin) == str(self.wrap_group_id): out("Moving wrap group %s to %d" % (cwin, iwin + shift)) self.wrap_group_id = str(iwin + shift) else: out("Moving window %s to %d" % (cwin, iwin + shift)) os.system(command) self.select("%d" % homewindow)
def __save_layouts(self): (homelayout, homelayoutname) = self.get_layout_number() findir = sc.datadir if homelayout == -1: sys.stderr.write("No layouts to save.\n") return False path_layout = os.path.join(findir, "load_layout") oflayout = open(path_layout, "w") ex_lay = [] for lay in sc.gen_layout_info(self, sc.dumpscreen_layout_info(self)): try: num = lay[0] title = lay[1] except: title = "" if self.excluded_layouts and (num in self.excluded_layouts or title in self.excluded_layouts): ex_lay.append(lay) else: sys.stdout.write("%s(%s); " % (num, title)) oflayout.write('''layout select %s layout dump \"%s\" dumpscreen layout \"%s\" ''' % (num, os.path.join(findir, "layout_" + num), os.path.join(findir, "winlayout_" + num))) oflayout.write('layout select %s\n' % homelayout) oflayout.close() self.source(path_layout) util.remove(path_layout) linkify(findir, "layout_" + homelayout, "last_layout") if ex_lay: sys.stdout.write(""" Excluded layouts: %s""" % str(ex_lay)) out("") return True
def __restore_mru(self): try: mru=open(os.path.join(self.basedir,self.savedir,"mru"),'r').read().strip().split(' ') mru.reverse() for win in mru: self.select("%s"%self.__wins_trans[win]) except: out('Unable to restore MRU!') pass if self.restore_previous: self.select(self.homewindow) elif os.path.exists(os.path.join(self.basedir,self.savedir,"last_win")): # select last selected window last=os.readlink(os.path.join(self.basedir,self.savedir,"last_win")) (lasthead,lasttail)=os.path.split(last) lastid=lasttail.split("_",1)[1] try: self.select(self.__wins_trans[lastid]) except: self.select('-') else: self.select('-')
def print_cell(content, width, leftmargin=1): out(' ' * leftmargin) string = str(content) pad = (width - len(string)) * ' ' if util.isnumber(content): out(pad + string) else: out(string + pad)
def __restore_mru(self): if self.enable_layout and not self.mru: pass else: try: if self.mru: sys.stdout.write("\nRestoring MRU windows order:") else: sys.stdout.write("\nSelecting last window:") mru_w = [] ifmru = open(os.path.join(self.basedir, self.savedir, "mru"), 'r') for line in ifmru: n = line.strip() try: nw = (self.__wins_trans)[n] mru_w.append('select ' + nw + '\n') sys.stdout.write(' %s' % nw) if not self.mru: break except: if self.enable_layout: mru_w.append('select -\n') else: pass ifmru.close() mru_w.reverse() path_mru_tmp = os.path.join(self.basedir, self.savedir, "mru_tmp") ofmru = open(path_mru_tmp, "w") ofmru.writelines(mru_w) ofmru.close() self.source(path_mru_tmp) util.remove(path_mru_tmp) except: sys.stderr.write(' Failed to load MRU.') out("")
def __setup_savedir(self,basedir,savedir): out ("Setting up session directory %s" % savedir) if not os.path.exists(basedir): os.makedirs(basedir) f=open(os.path.join(basedir,self.blacklistfile),'w') f.close() if os.path.exists(os.path.join(basedir,savedir)): out("Directory \"%s\" in \"%s\" already exists. Use --force to overwrite." % (savedir, basedir)) if self.force: out('forcing..') out('cleaning up \"%s\"' % savedir) map(os.remove,glob.glob(os.path.join(basedir,savedir,'win_*'))) map(os.remove,glob.glob(os.path.join(basedir,savedir,'hardcopy.*'))) map(os.remove,glob.glob(os.path.join(basedir,savedir,'layout_*'))) map(os.remove,glob.glob(os.path.join(basedir,savedir,'winlayout_*'))) return True else: out('Aborting.') return False else: os.makedirs(os.path.join(basedir,savedir)) return True
def _add_profgroup(self): """Safely insert profgroup. If a record with the same profgroup condition exists, it only returns the id column of that, otherwise it inserts a new record and returns the id. @param self @return id of profgroup to insert into profexec table """ sql_s = """SELECT id FROM profgroup WHERE application = ? AND nodes = ? AND procs = ? AND place = ? AND library = ?; """ pd = self.infodic print pd rtup = self.conn.select(sql_s, (pd["soupdic"]["Executable"].encode("utf_8"), pd["nodes"], pd["nproc"], pd["place"].encode("utf_8"), pd["library"].encode("utf_8"),)) if len(rtup) == 0: if self.options.verbose >= 1: util.out("No such profgroup. will newly insert...") pginsert = { "application": pd["soupdic"]["Executable"].encode("utf_8"), "app_viewname": pd["app_viewname"], "nodes": pd["nodes"], "procs": pd["nproc"], "place": pd["place"].encode("utf_8"), "library": pd["library"].encode("utf_8")} if self.options.verbose >= 3: util.out("new profgroup dict", pginsert) rdic = self.conn.insert("profgroup", pginsert) rt = rdic["id"] if self.options.verbose >= 1: util.out("New profgroup %d: %s" % (rt, pginsert)) else: if self.options.verbose >= 1: util.out("Using existing profgroup ...") rt = rtup[0][0] return rt
def main(self): """Main function. @param self @todo library などの指定を可能にする @todo SQLite3 のとき DB ファイルの指定を可能にする @todo -t でテストにする @todo class Parp など """ self.parse_opt() # Data Prepare logdir = self.args[0] funcmapfile = self.args[1] self.load_profs(logdir, funcmapfile) ## Unique nodes list self.nodeset = util.node_set(self.profs) # Prepare information to add self.prepare_registration() # DB prepare #self.conn = db.init("postgres", username="******", hostname="127.0.0.1") self.conn = db.init("sqlite3", dbfile="/home/kabe/Archives/prof.db") ### BEGIN TRANSACTION ### self.conn.begin_transaction() # Register try: # Profgroup group_id = self.add_profgroup() # ProfExec Insert profexec_id = self.add_profexec(group_id) util.out(group_id, profexec_id) # Profile Insert self.insert_profile(profexec_id) except Exception, e: util.err("Exception in main", repr(e)) self.conn.rollback_transaction() raise # Re-raise the exception
def __restore_mru(self): if self.enable_layout and not self.mru: pass else: try: if self.mru: sys.stdout.write("\nRestoring MRU windows order:") else: sys.stdout.write("\nSelecting last window:") mru_w = [] ifmru = open(os.path.join(self.basedir, self.savedir, "mru"), "r") for line in ifmru: n = line.strip() try: nw = (self.__wins_trans)[n] mru_w.append("select " + nw + "\n") sys.stdout.write(" %s" % nw) if not self.mru: break except: if self.enable_layout: mru_w.append("select -\n") else: pass ifmru.close() mru_w.reverse() path_mru_tmp = os.path.join(self.basedir, self.savedir, "mru_tmp") ofmru = open(path_mru_tmp, "w") ofmru.writelines(mru_w) ofmru.close() self.source(path_mru_tmp) util.remove(path_mru_tmp) except: sys.stderr.write(" Failed to load MRU.") out("")
def knn_similarities(df, sim_thresh=0.8, n_neighbors=100, approx_datapoints=120000, max_feats=None, in_col='text', out_col='text_id', out_dir='', fname='sim.csv'): ut.makedirs(out_dir) ut.out('splitting data into manageable chunks...') dfs = _split_data(df, approx_datapoints=approx_datapoints, in_col=in_col) all_ids = defaultdict(set) group_id = 0 for n, chunk_df in enumerate(dfs): ut.out('creating tf-idf matrix for chunk %d...' % n) groups = defaultdict(lambda: set()) g_df = chunk_df.groupby(in_col).size().reset_index() strings = list(g_df[in_col]) tf_idf_matrix = _tf_idf(strings, analyzer=_ngrams, max_feats=max_feats) nbrs = NearestNeighbors(n_neighbors=n_neighbors).fit(tf_idf_matrix) ut.out(str(tf_idf_matrix.shape)) ut.out('querying/filtering each object for its closest neighbors...') for row in range(len(strings)): # if row % 100 == 0: # ut.out('%d' % row) distances, indexes = nbrs.kneighbors(tf_idf_matrix.getrow(row)) nbs = list(zip(distances[0], indexes[0])) nbs = [(d, i) for d, i in nbs if d <= sim_thresh] # ut.out('\n%s' % strings[row]) # for d, i in nbs[:5]: # ut.out('[%d] %s: %f' % (i, strings[i], d)) groups[group_id].update(set([i for d, i in nbs])) group_id += 1 groups = _merge_identical_groups(groups) ids = _assign_ids_to_items(groups, strings) all_ids = _aggregate_identical_keys(all_ids, ids) all_ids = _prune_single_items(all_ids, df, in_col) all_ids = _prune_redundant_ids(all_ids) sim_df = _ids_to_dataframe(all_ids, df, in_col=in_col, out_col=out_col) sim_df.to_csv(out_dir + fname, index=None)
def load(self): if 'all' in self.force_start: self.primer_arg+='S' self.force_start=[] if 'all' in self.scroll: self._scrollfile=None out('session "%s" loading "%s"' % (self.pid,os.path.join(self.basedir,self.savedir))) #check if the saved session exists and get the biggest saved window number and a number of saved windows maxnewwindow=0 newwindows=0 try: winlist=list(glob.glob(os.path.join(self.basedir,self.savedir,'win_*'))) newwindows=len(winlist) out('%d new windows'%newwindows) except Exception,e: out('Unable to open.') out(str(e)) return 1
def save(self): (self.homewindow, title) = self.get_number_and_title() out("\nCreating directories:") if not self.__setup_savedir(self.basedir, self.savedir): return 1 sc.require_dumpscreen_window(self.pid, True) if self.enable_layout: out("\nSaving layouts:") self.__save_layouts() out("\nSaving windows:") self.__save_screen() out("\nCleaning up scrollbacks.") self.__scrollback_clean() if self.__vim_files: self.__wait_vim() return 0
def _significance(df, pred, samples=20): ref_auprs, pred_auprs = [], [] ref_aurocs, pred_aurocs = [], [] lc, rc = 'label', 'ref_pred' t1 = ut.out('computing aupr and auroc significance levels...') for i in range(samples): s_df = df.sample(frac=0.5, replace=True) ref_auprs.append(average_precision_score(s_df[lc], s_df[rc])) ref_aurocs.append(roc_auc_score(s_df[lc], s_df[rc])) pred_auprs.append(average_precision_score(s_df[lc], s_df[pred])) pred_aurocs.append(roc_auc_score(s_df[lc], s_df[pred])) auprs = np.subtract(ref_auprs, pred_auprs) aurocs = np.subtract(ref_aurocs, pred_aurocs) zeros = np.zeros(len(auprs)) t1, aupr_pval = ttest_rel(auprs, zeros) t2, auroc_pval = ttest_rel(aurocs, zeros) ut.time(t1) return aupr_pval, auroc_pval
def _approximations(df, relations=[]): t1 = ut.out('approximating relational with mean, max, median...') df = df.copy() con_obj = Connections() g, sgs = con_obj.find_subgraphs(df, relations, verbose=False) approx_dict = {} sg_list = [] for i, sg in enumerate(sgs): if sg[3] > 0: # num edges > 0 sg_list.extend([(x, i) for x in sg[0]]) # give sg_id if len(sg_list) == 0: return approx_dict sg_df = pd.DataFrame(sg_list, columns=['com_id', 'sg_id']) df = df.merge(sg_df, how='left') df['sg_id'] = df['sg_id'].fillna(-1).apply(int) sg_mean = df.groupby('sg_id')['ind_pred'].mean().reset_index()\ .rename(columns={'ind_pred': 'sg_mean_pred'}) sg_median = df.groupby('sg_id')['ind_pred'].median().reset_index()\ .rename(columns={'ind_pred': 'sg_median_pred'}) sg_max = df.groupby('sg_id')['ind_pred'].max().reset_index()\ .rename(columns={'ind_pred': 'sg_max_pred'}) df = df.merge(sg_mean).merge(sg_median).merge(sg_max) filler = lambda x, c: x['ind_pred'] if x['sg_id'] == -1 else x[c] for col in ['sg_mean_pred', 'sg_median_pred', 'sg_max_pred']: cols = ['ind_pred', col, 'sg_id'] df[col] = df[cols].apply(filler, axis=1, args=(col,)) ut.time(t1) return df
def run_interpreter_tests(debug=False): tot = 0 tot_ok = 0 for flname in os.listdir(INTERPRETER_TEST_DIR): if os.path.splitext(flname)[1] != ".js": continue tot += 1 flpath = os.path.join(INTERPRETER_TEST_DIR, flname) out(flname) outp = evaluate_file(flpath, debug) exppath = get_exp_path(flpath, '.exp') stat = validate_value(flpath, outp) if stat == 'match': tot_ok += 1 out('%s %s\n' % (exppath, stat)) out('%d of %d interpreter tests successful\n' % (tot_ok, tot))
def _split_data(df, approx_datapoints=120000, in_col='text'): delta = 100000000 if len(df.groupby(in_col).size()) <= approx_datapoints: ut.out('found optimal num pieces: 1') return [df] for i in range(2, 1000): dps = [] pieces = np.array_split(df, i) for piece in pieces: dps.append(len(piece.groupby(in_col).size())) mean_dps = np.mean(dps) ut.out('num pieces: %d, mean datapoints: %.2f' % (i, mean_dps)) new_delta = np.abs(approx_datapoints - mean_dps) if new_delta < delta: delta = new_delta else: ut.out('found optimal num pieces: %d' % (i - 1)) pieces = np.array_split(df, i - 1) return pieces
def _analyze(df, col, samples=100, relations=[]): gids = [r[2] for r in relations] if len(relations) == 0: return {} t1 = ut.out('computing messages missed most often...') p, r, ts = precision_recall_curve(df['label'], df[col]) aupr = average_precision_score(df['label'], df[col]) mp = 1.0 - aupr corrects = [] step = int(len(ts) / 100) if len(ts) > 100 else 1 for i in range(0, len(ts), step): t = ts[i] df['pred'] = np.where(df[col] > t, 1, 0) correct = df['pred'] == df['label'] corrects.append(correct.apply(int)) total_corrects = [sum(x) for x in zip(*corrects)] df['correct'] = total_corrects # extract bottom x% data df = df.sort_values('correct', ascending=False) ndx = len(df) - int(len(df) * mp) qf1, qf2 = df[ndx:], df[:ndx] # dfs = df[df['label'] == 1] qf1s = qf1[qf1['label'] == 1] # low performers qf1o = qf1[qf1['label'] == 0] # low performers qf2s = qf2[qf2['label'] == 1] # high performers qf2o = qf2[qf2['label'] == 0] # high performers ut.time(t1) # ut.out('spam in bot %.2f%%: %d' % (mp * 100, len(qf1s))) # ut.out('ham in bot %.2f%%: %d' % (mp * 100, len(qf1o))) t1 = ut.out('computing messages with a relation...') r1s, r1sf = _msgs_with_rel(qf1s, gids, mp, 'bot', 'spam') r1o, r1of = _msgs_with_rel(qf1o, gids, mp, 'bot', 'ham') r2s, r2sf = _msgs_with_rel(qf2s, gids, mp, 'top', 'spam') r2o, r2of = _msgs_with_rel(qf2o, gids, mp, 'top', 'ham') ut.time(t1) # ut.out() t1 = ut.out('computing messages with an outside relation...') rr1sof = _rm_in_sect(df, qf1s, qf2, gids, mp, r1s, 'bot', 'spam') rr1oof = _rm_in_sect(df, qf1o, qf2, gids, mp, r1o, 'bot', 'ham') rr2sof = _rm_in_sect(df, qf2s, qf1, gids, mp, r2s, 'top', 'spam') rr2oof = _rm_in_sect(df, qf2o, qf1, gids, mp, r2o, 'top', 'ham') # rr1sif = self._rm_in_sect(df, qf1s, qf1, gids, mp, r1s, 'bot', 'spam', # 'inside') # rr1oif = self._rm_in_sect(df, qf1o, qf1, gids, mp, r1o, 'bot', 'ham', # 'inside') sd = {} sd['bot_spam_rels'] = round(r1sf, 4) sd['bot_ham_rels'] = round(r1of, 4) sd['top_spam_rels'] = round(r2sf, 4) sd['top_ham_rels'] = round(r2of, 4) sd['bot_spam_rels_out'] = round(rr1sof, 4) sd['bot_ham_rels_out'] = round(rr1oof, 4) sd['top_spam_rels_out'] = round(rr2sof, 4) sd['top_ham_rels_out'] = round(rr2oof, 4) # sd['bot_spam_rels_in'] = rr1sif # sd['bot_ham_rels_in'] = rr1oif ut.time(t1) return sd
def cosine_similarities(df, sim_thresh=0.8, in_col='text', out_col='text_id', approx_datapoints=120000, max_feats=None, k=5, max_id=0, out_dir='', fname='sim.csv'): ut.makedirs(out_dir) group_id = max_id all_ids = defaultdict(set) dfs = _split_data(df, approx_datapoints=approx_datapoints, in_col=in_col) for n, chunk_df in enumerate(dfs): t1 = time.time() ut.out('\ncreating tf-idf matrix for chunk %d...' % (n + 1)) groups = defaultdict(set) g_df = chunk_df.groupby(in_col).size().reset_index() strings = list(g_df[in_col]) m = _tf_idf(strings, analyzer=_ngrams, max_feats=max_feats) v, total = len(m.data), m.shape[0] * m.shape[1] ut.out('sparsity: (%d/%d) %.2f%%' % (v, total, 100 * (v / total))) ut.out('computing cosine similarities...') cos_sim = cosine_similarity(m, dense_output=False) ut.out('filtering out simiarities below threshold...') scm = cos_sim >= sim_thresh ut.out('putting matches into groups...') for ndx in range(len(strings)): data = cos_sim[ndx].data indices = list(cos_sim[ndx].indices) sims = [(x, data[indices.index(x)]) for x in scm[ndx].indices] sims = sorted(sims, key=lambda x: x[1], reverse=True) sim_ids = [sim_ndx for sim_ndx, sim_val in sims[:k]] groups[group_id].update(set(sim_ids)) group_id += 1 ut.out('merging identical groups...') groups = _merge_identical_groups(groups) ut.out('assigning ids to items...') ids = _assign_ids_to_items(groups, strings) ut.out('aggregating identical keys...') all_ids = _aggregate_identical_keys(all_ids, ids) ut.out('chunk time: %.4fm' % ((time.time() - t1) / 60.0)) t1 = time.time() ut.out('\nprune single items...') all_ids = _prune_single_items(all_ids, df, in_col) ut.time(t1) t1 = time.time() ut.out('prune redundant ids...') all_ids = _prune_redundant_ids(all_ids) ut.time(t1) t1 = time.time() ut.out('putting ids into a dataframe...') sim_df = _ids_to_dataframe(all_ids, df, in_col=in_col, out_col=out_col) ut.out('writing to csv...', 0) sim_df.to_csv(out_dir + fname, index=None) ut.time(t1) ut.out()
self.vote(driver, username, vote_url) break except UnexpectedAlertPresentException: # Captcha Error out(f"Retrying to vote for {username}") continue if __name__ == "__main__": bot = Votebot() usernames = get_lines( bot.conf["username_file"]) # Users to get the voting reward for vote_urls = get_lines( bot.conf["vote_url_file"] ) # URL to the vote page of a server on minecraft-server.eu bot.run(usernames, vote_urls) if bot.conf["use_timer"] == "True": while True: # calculate a randomized time for the next execution time_till_next_day = datetime.combine( datetime.now().date() + timedelta(days=1), datetime.strptime("0000", "%H%M").time()) - datetime.now() delay = time_till_next_day + timedelta(hours=random.randint(2, 23)) out(f"Next execution in: {delay}") time.sleep(delay.seconds) bot.run(usernames, vote_urls)
sys.stdout.write("(shell)") extra_data_name = self.__save_shellvars(cwin, args[0].split("/")[-1]) cpids_data[i] = ( cpids_data[i][0], cpids_data[i][1], cpids_data[i][2], cpids_data[i][3], extra_data_name, ) scrollback_filename = os.path.join(findir, "hardcopy." + cwin) sys.stdout.write("%s %s; " % (cwin, ctypestr)) errors += self.__save_win(cwin, ctypestr, cpids_data, ctime, rollback) rollback = (None, None, None) out("") # remove ignored scrollbacks if "all" in self.scroll: for f in glob.glob(os.path.join(findir, "hardcopy.*")): open(f, "w") elif self.scroll: import tools (scroll_groups, scroll_wins) = tools.subwindows(self.pid, sc.datadir, self.scroll) out("Scrollback excluded groups: %s" % str(scroll_groups)) out("All scrollback excluded windows: %s" % str(scroll_wins)) for w in scroll_wins: util.remove(os.path.join(findir, "hardcopy.%s" % w))
def __load_layouts(self): cdinfo = map(int, self.dinfo()[0:2]) out("Terminal size: %s %s" % (cdinfo[0], cdinfo[1])) homewindow = self.homewindow (homelayout, homelayoutname) = self.get_layout_number() layout_trans = {} layout_c = len(glob.glob(os.path.join(self.basedir, self.savedir, "winlayout_*"))) if layout_c > 0: self.__layouts_loaded = True lc = 0 layout_file = sc.layout_begin(self.pid) while lc < layout_c: filename = None try: filename = glob.glob(os.path.join(self.basedir, self.savedir, "layout_%d" % lc))[0] layoutnumber = filename.rsplit("_", 1)[1] (head, tail) = os.path.split(filename) # the winlayout_NUM files contain "dumpscreen layout" output # (see GNUScreen.Regions class) filename2 = os.path.join(head, "win" + tail) regions = sc.get_regions(filename2) status = self.get_layout_new(regions.title) if not status: sys.stderr.write( "\nMaximum number of layouts reached. Ignoring layout %s (%s).\n" % (layoutnumber, regions.title) ) break else: if self.exact: self.layout("number %s" % layoutnumber, False) currentlayout = layoutnumber else: currentlayout = self.get_layout_number()[0] layout_trans[layoutnumber] = currentlayout sc.layout_select_layout(currentlayout) # source the output produced by "layout dump" sc.layout_load_dump(open(filename, "r")) regions_size = [] winlist = [] for (window, sizex, sizey) in regions.regions: winlist.append(window) regions_size.append((sizex, sizey)) sc.layout_load_regions(regions, self.__wins_trans, cdinfo[0], cdinfo[1]) # sys.stdout.write(" %s (%s);" % (layoutnumber, regions.title)) except: # import traceback # traceback.print_exc(file=sys.stderr) # raise layout_c += 1 if layout_c > 2000: sys.stderr.write("\nErrors during layouts loading.\n") break lc += 1 out("") if not lc == 0: # select last layout lastname = None lastid_l = None if homelayout != -1: out("Returning homelayout %s" % homelayout) layout_file.write("layout select %s" % homelayout) else: sys.stderr.write("No homelayout - unable to return.\n") if os.path.exists(os.path.join(self.basedir, self.savedir, "last_layout")) and len(layout_trans) > 0: last = os.readlink(os.path.join(self.basedir, self.savedir, "last_layout")) (lasthead, lasttail) = os.path.split(last) last = lasttail.split("_", 2) lastid_l = last[1] try: out("Selecting last layout: %s (%s)" % (layout_trans[lastid_l], lastid_l)) layout_file.write("layout select %s" % layout_trans[lastid_l]) # ^^ layout numbering may change, use layout_trans={} except: sys.stderr.write("Unable to select last layout %s\n" % lastid_l) else: self.enable_layout = False sc.layout_end()
help='chunk size to use: %(default)s') parser.add_argument('-c', '--chunk', default=-1, type=int, help='chunk to use: %(default)s') args = parser.parse_args() domain = args.domain info_type = args.info_type approx_datapoints = args.approx_datapoints sim_thresh = args.sim_thresh max_feats = int(args.max_feats) if args.max_feats is not None else None k = args.topk chunk_size = args.chunk_size chunk = args.chunk t = (domain, info_type, approx_datapoints, sim_thresh, k) ut.out('d: %s, i: %s, a: %d, s: %.2f, k: %d' % t) if max_feats is not None: ut.out(', m: %d' % max_feats, 0) in_dir = 'independent/data/' + domain + '/extractions/' out_dir = 'independent/data/' + domain + '/similarities/' df = pd.read_csv(in_dir + info_type + '.csv') fname = str(chunk) + '_' + info_type + '_sim.csv' df = retrieve_chunk(df, chunk_number=chunk, max_size=chunk_size) max_id = retrieve_max_id(out_dir, chunk_number=chunk, info_type=info_type) cosine_similarities(df, in_col=info_type, out_col=info_type + '_id', out_dir=out_dir, fname=fname, max_feats=max_feats, k=k, approx_datapoints=approx_datapoints,
def vote(self, driver, username, vote_url): # TODO set viewport depending on whether a mobile or desktop useragent is used self.set_viewport_size(driver, 1920, 1080) driver.get(vote_url) # time.sleep(5) # Wait for the page to properly load try: # Accept TOS tos_box = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.CLASS_NAME, 'qc-cmp-ui-content'))) actions = ActionChains(driver) actions.move_to_element(tos_box).perform() tos_box.click() submit_button = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.XPATH, "/html/body/div[1]/div/div/div[2]/button[2]"))) submit_button.click() except TimeoutException: pass # No TOS popup time.sleep(2) try: # We use .find_element_by_id here because we know the id text_input = driver.find_element_by_id("playername") time.sleep(10) text_input.click() # Then we'll fake typing into it text_input.send_keys(username) time.sleep(2) except NoSuchElementException: pass # Users cannot recieve rewards for voting # Now we can grab the submit button and click it submit_button = driver.find_element_by_id("captcha") submit_button.click() time.sleep(4) try: # Try to solve a captcha with the browser extension Buster driver.switch_to.frame( driver.find_element_by_xpath( '//*[@title="recaptcha challenge"]')) time.sleep(3) buster_button = driver.find_element_by_xpath( '//*[@id="solver-button"]') buster_button.click() except NoSuchElementException: pass # No captcha # TODO Optimize the url check i = 0 while True: if "success" in driver.current_url or "fail" in driver.current_url: current_url = driver.current_url break elif i == 5: out(f"Captcha failed for {username}") raise UnexpectedAlertPresentException i += 1 time.sleep(1) if "success" in current_url: out(f"Voted successfully for {username}") elif "fail" in current_url: out(f"Couldn't vote for {username}") driver.close()
def _spread(df, col='ind_pred', relations=[]): """This'll give some post-hoc test-set analysis, when running this, keep track of the test sets that improved using relational modeling, then average those test set statistics together to compare to the test sets that did not improve.""" t1 = ut.out('computing subgraph statistics...') con_obj = Connections() gids = [r[2] for r in relations] g, sgs = con_obj.find_subgraphs(df, relations, verbose=False) spread_dict = {} sg_list = [] for i, sg in enumerate(sgs): if sg[3] > 0: # num edges > 0 sg_list.extend([(x, i) for x in sg[0]]) # give sg_id if len(sg_list) == 0: return spread_dict sg_df = pd.DataFrame(sg_list, columns=['com_id', 'sg_id']) df = df.merge(sg_df, how='left') df['sg_id'] = df['sg_id'].fillna(-1).apply(int) p, r, ts = precision_recall_curve(df['label'], df[col]) aupr = average_precision_score(df['label'], df[col]) mp = 1.0 - aupr corrects = [] step = int(len(ts) / 100) if len(ts) > 100 else 1 for i in range(0, len(ts), step): t = ts[i] df['pred'] = np.where(df[col] > t, 1, 0) correct = df['pred'] == df['label'] corrects.append(correct.apply(int)) total_corrects = [sum(x) for x in zip(*corrects)] df['correct'] = total_corrects # extract bottom x% data df = df.sort_values('correct', ascending=False) ndx = len(df) - int(len(df) * mp) qfs = df[df['label'] == 1] qfo = df[df['label'] == 0] qf1, qf2 = df[ndx:], df[:ndx] qf1s = qf1[qf1['label'] == 1] # low performers qf1o = qf1[qf1['label'] == 0] # low performers qf2s = qf2[qf2['label'] == 1] # high performers qf2o = qf2[qf2['label'] == 0] # high performers spread_dict['spam_mean'] = round(qfs['ind_pred'].mean(), 4) spread_dict['spam_median'] = round(qfs['ind_pred'].median(), 4) spread_dict['ham_mean'] = round(qfo['ind_pred'].mean(), 4) spread_dict['ham_median'] = round(qfo['ind_pred'].median(), 4) for nm, temp_df in [('bot_spam', qf1s), ('bot_ham', qf1o), ('top_spam', qf2s), ('top_ham', qf2o)]: wf = temp_df[(temp_df[gids] != -1).any(axis=1)] sg_mean = wf.groupby('sg_id')['ind_pred'].mean().reset_index()\ .rename(columns={'ind_pred': 'sg_mean'}) sg_std = wf.groupby('sg_id')['ind_pred'].std().reset_index()\ .rename(columns={'ind_pred': 'sg_std'}) sg_median = wf.groupby('sg_id')['ind_pred'].median().reset_index()\ .rename(columns={'ind_pred': 'sg_median'}) sg_min = wf.groupby('sg_id')['ind_pred'].min().reset_index()\ .rename(columns={'ind_pred': 'sg_min'}) sg_max = wf.groupby('sg_id')['ind_pred'].max().reset_index()\ .rename(columns={'ind_pred': 'sg_max'}) wf = wf.merge(sg_mean).merge(sg_std).merge(sg_median)\ .merge(sg_min).merge(sg_max) wf['sg_spread'] = wf['sg_max'] - wf['sg_min'] spread_dict[nm + '_sg_mean'] = round(np.mean(wf['sg_mean']), 4) spread_dict[nm + '_sg_std'] = round(np.mean(wf['sg_std']), 4) spread_dict[nm + '_sg_median'] = round(np.mean(wf['sg_median']), 4) spread_dict[nm + '_sg_min'] = round(np.mean(wf['sg_min']), 4) spread_dict[nm + '_sg_max'] = round(np.mean(wf['sg_max']), 4) spread_dict[nm + '_sg_spread'] = round(np.mean(wf['sg_spread']), 4) ut.time(t1) return spread_dict
def compute_big_aupr(start_fold=0, ref_start_fold=-1, num_folds=5, domain='twitter', models=['ind'], in_dir='', gids=[]): ind_data_dir = 'independent/data/' + domain + '/' lines = {'ind': 'b-', 'mrf': 'g--', 'psl': 'm-.', 'mean': 'r:', 'median': 'c:', 'max': 'y:'} inds, mrfs, psls, approxs, refs = [], [], [], [], [] preds = [] gen_obj = Generator() relations = _relations_for_gids(gids) for model in models: preds.append(model + '_pred') if 'approx' in models: models.remove('approx') models.extend(['mean', 'median', 'max']) preds.extend(['mean_pred', 'median_pred', 'max_pred']) preds = list(zip(models, preds)) t1 = ut.out('reading true labels...', 0) full_df = pd.read_csv(ind_data_dir + 'comments.csv') lbl_df = full_df[['com_id', 'label']] ut.time(t1) s = '%s: reading model preds from fold %d to %d:' ut.out(s % (domain, start_fold, start_fold + num_folds - 1), 1) newline = 1 if 'approx' in models else 0 d = {} for i, fold in enumerate(range(start_fold, start_fold + num_folds)): ut.out('\nreading preds for fold %d...' % i, newline) f_dict = {} if ref_start_fold > -1: ndx = ref_start_fold + i fname = in_dir + 'test_' + str(ndx) + '_preds.csv' assert os.path.exists(fname) refs.append(pd.read_csv(fname)) if 'ind' in models: fname = in_dir + 'test_' + str(fold) + '_preds.csv' assert os.path.exists(fname) ind_df = pd.read_csv(fname) inds.append(ind_df) ind_lbl_df = full_df.merge(ind_df, on='com_id') t1 = ut.out('generating group ids...') for gid in gids: ind_lbl_df = gen_obj.gen_group_id(ind_lbl_df, gid) ut.time(t1) m_dict = _metrics(ind_lbl_df) a_dict = _analyze(ind_lbl_df, relations=relations, col='ind_pred') s_dict = _spread(ind_lbl_df, col='ind_pred', relations=relations) f_dict.update(a_dict) f_dict.update(s_dict) f_dict.update(m_dict) if 'mean' in models: temp_df = full_df.merge(ind_df) t1 = ut.out('generating group ids...') for gid in gids: temp_df = gen_obj.gen_group_id(temp_df, gid) ut.time(t1) approx_df = _approximations(temp_df, relations) approxs.append(approx_df) if 'mrf' in models: fname = in_dir + 'mrf_preds_' + str(fold) + '.csv' assert os.path.exists(fname) mrf_df = pd.read_csv(fname) mrfs.append(mrf_df) mrf_lbl_df = lbl_df.merge(mrf_df) m_dict = _metrics(mrf_lbl_df, col='mrf_pred', model='mrf') f_dict.update(m_dict) if 'psl' in models: fname = in_dir + 'psl_preds_' + str(fold) + '.csv' assert os.path.exists(fname) psl_df = pd.read_csv(fname) psls.append(psl_df) psl_lbl_df = lbl_df.merge(psl_df) m_dict = _metrics(psl_lbl_df, col='psl_pred', model='psl') f_dict.update(m_dict) d[i] = f_dict print(d) dicts = [d[i] for i in range(len(d))] stats_df = pd.DataFrame(dicts) stats_df = stats_df.reset_index()\ .rename(columns={'index': 'test_set'}) stats_df.to_csv('tw_full_0stk.csv', index=None) t1 = ut.out('concatenating test set predictions...') df = full_df[['com_id', 'label']] if 'ind' in models: ind_df = pd.concat(inds) df = df.merge(ind_df) if 'mean' in models: approx_df = pd.concat(approxs) assert set(ind_df['com_id']) == set(approx_df['com_id']) df = df.merge(approx_df) if ref_start_fold > -1: ref_df = pd.concat(refs) ref_df = full_df[['com_id', 'label']].merge(ref_df) ref_df = ref_df[['com_id', 'ind_pred']] ref_df = ref_df.rename(columns={'ind_pred': 'ref_pred'}) assert set(ind_df['com_id']) == set(ref_df['com_id']) df = df.merge(ref_df) if 'mrf' in models: mrf_df = pd.concat(mrfs) assert set(ind_df['com_id']) == set(mrf_df['com_id']) df = df.merge(mrf_df) if 'psl' in models: psl_df = pd.concat(psls) assert set(ind_df['com_id']) == set(psl_df['com_id']) df = df.merge(psl_df) ut.time(t1) t1 = ut.out('applying noise to predictions...') noise = 0.000025 perturb = lambda x: max(0.0, min(1.0, x + ran.uniform(-noise, noise))) if 'ind' in models: df['ind_pred'] = df['ind_pred'].apply(perturb) if 'mean' in models: df['mean_pred'] = df['mean_pred'].apply(perturb) df['median_pred'] = df['median_pred'].apply(perturb) df['max_pred'] = df['max_pred'].apply(perturb) if 'mrf' in models: df['mrf_pred'] = df['mrf_pred'].apply(perturb) if 'psl' in models: df['psl_pred'] = df['psl_pred'].apply(perturb) ut.time(t1) # compute reference aupr and auroc ref_label, ref_pred = df['label'], df['ref_pred'] ref_aupr = average_precision_score(ref_label, ref_pred) ref_auroc = roc_auc_score(ref_label, ref_pred) ref_p, ref_r, ref_t = precision_recall_curve(ref_label, ref_pred) ref_fpr, ref_tpr, ref_t2 = roc_curve(ref_label, ref_pred) ut.out('%s aupr: %.4f, auroc: %.4f' % ('reference', ref_aupr, ref_auroc)) ut.plot_pr_curve('ref', ref_p, ref_r, ref_aupr, domain=domain, line='k-', show_legend=True) ut.plot_roc_curve('ref', ref_tpr, ref_fpr, ref_auroc, domain=domain, line='k-', show_legend=True) auroc_pval, aupr_pval = 0, 0 # compute combined test set curves for i, (model, pred) in enumerate(preds): aupr = average_precision_score(df['label'], df[pred]) auroc = roc_auc_score(df['label'], df[pred]) p, r, _ = precision_recall_curve(df['label'], df[pred]) fpr, tpr, _ = roc_curve(df['label'], df[pred]) # aupr_pval, auroc_pval = _significance(df, pred) t = (model, aupr, aupr_pval, auroc, auroc_pval) ut.out('%s aupr: %.4f (%.4f), auroc: %.4f (%.4f)' % t) save = True if i == len(preds) - 1 else False ut.plot_pr_curve(model, p, r, aupr, domain=domain, line=lines[model], show_legend=True) ut.plot_roc_curve(model, tpr, fpr, auroc, save=save, domain=domain, line=lines[model], show_legend=True) ut.out()