Example #1
0
def makePatch(oldfl, newfl, patchdir, ignorebin=False):
    
  cmd = ['diff', '-Nu', oldfl, newfl]
  if not ignorebin:
    cmd.append('-a')

  # No patches for symbolic links.
  if os.path.islink(newfl):
    return False

  assert newfl.startswith(cfg.DEVDIR + '/')
  patchpath = newfl[len(cfg.DEVDIR)+1:]
  outfile = '_'.join(patchpath.split('/')) + ".patch"
  outpath = os.path.join(patchdir, outfile)

  proc = subprocess.Popen(cmd, stdout=PIPE)
  diffout = proc.communicate()[0]
  
  if MAJOR >= 3:
    enc = sys.stdout.encoding
    if enc is None: enc = 'utf-8'
    diffout = diffout.decode(enc)

  if ignorebin and diffout.startswith('Binary files '):
    return False

  out("Saving patch for %s to %s" % (newfl, outpath))
  outfl = open(outpath, 'w')
  outfl.write(diffout)
  outfl.write("\n")
  outfl.flush()
  outfl.close()

  return True
Example #2
0
    def add_profexec(self, group_id):
        """Insert a record of profexec table.

        @param self
        @param group_id id of profgroup table
        @return the id of profexec table
        """
        exec_time = self.infodic["exec_time"]
        start_ts = self.infodic["start_ts"]
        i_dic = {
            "profgroup_id": group_id,
            "exec_time": exec_time,
            "start_ts": start_ts}
        sql_s = """SELECT id FROM profexec
                   WHERE profgroup_id = ?
                     AND start_ts = ?;"""
        # large integer has suffix "L", which should be removed by str()
        rtup = self.conn.select(sql_s, (group_id, str(start_ts)))
        if len(rtup) == 0:
            if self.options.verbose >= 1:
                util.out("No such profexec. will newly insert...")
            rdic = self.conn.insert("profexec", i_dic)
            exec_id = rdic["id"]
            if self.options.verbose >= 2:
                util.out("new exec id", exec_id)
            return exec_id
        else:
            raise Exception("Same Profexec exists, aborting")
Example #3
0
 def __setup_savedir(self, basedir, savedir):
     out('Setting up session directory "%s"' % savedir)
     if not os.path.exists(basedir):
         os.makedirs(basedir)
         f = open(os.path.join(basedir, self.blacklistfile), "w")
         f.close()
     return True
Example #4
0
    def _string_ids(self, df, g_id, regex=r'(#\w+)', data_dir=None):
        fp = ''
        if data_dir is not None:
            hash_path = data_dir + 'hashtag_sim.csv'
            ment_path = data_dir + 'mention_sim.csv'
            link_path = data_dir + 'link_sim.csv'

            if regex == r'(#\w+)':
                fp = hash_path
            elif regex == r'(@\w+)':
                fp = ment_path
            elif regex == r'(http[^\s]+)':
                fp = link_path

        if data_dir is not None and os.path.exists(fp):
            ut.out('reading sim file...', 0)
            r_df = pd.read_csv(fp)
            r_df = r_df[r_df['com_id'].isin(df['com_id'])]
            g_df = r_df.groupby(g_id).size().reset_index()
            g_df = g_df[g_df[0] > 1]
            r_df = r_df[r_df[g_id].isin(g_df[g_id])]

        else:
            group = g_id.replace('_id', '')
            regex = re.compile(regex)
            inrel = []

            for _, row in df.iterrows():
                s = self._get_items(row.text, regex)
                inrel.append({'com_id': row.com_id, group: s})

            inrel_df = pd.DataFrame(inrel).drop_duplicates()
            inrel_df = inrel_df[inrel_df[group] != '']
            r_df = self._cols_to_ids(inrel_df, g_id=g_id, cols=[group])
        return r_df
 def __create_win(self,keep_numbering,wins_trans,pid,hostgroup,rootgroup,win,time,group,type,title,filter,scrollback_len,processes):
     if keep_numbering:
         winarg=win
     else:
         winarg=""
     
     if type[0]=='b':
         if win in self.force_start:
             primer_arg=self.primer_arg+'S'
         else:
             primer_arg=self.primer_arg
         if win in self.scroll or not self._scrollfile or not os.path.exists(os.path.join(self.homedir,self.projectsdir,self._scrollfile+win)):
             scrollfile='0'
         else:
             scrollfile=self._scrollfile+win
         #print ('-h %s -t \"%s\" %s %s %s %s %s %s' % (scrollback_len,title,winarg,self.primer,primer_arg,self.projectsdir, scrollfile,os.path.join(self.savedir,"win_"+win)))
         self.screen('-h %s -t \"%s\" %s %s %s %s %s %s' % (scrollback_len,title,winarg,self.primer,primer_arg,self.projectsdir, scrollfile,os.path.join(self.savedir,"win_"+win)) )
         #self.screen('-h %s -t \"%s\" %s %s %s %s %s %s' % (scrollback_len,title,winarg,self.primer,primer_arg,self.projectsdir,"0",os.path.join(self.savedir,"win_"+win)) )
     elif type[0]=='g':
         self.screen('-t \"%s\" %s //group' % (title,winarg ) )
     else:
         out ('%s Unknown window type "%s". Ignoring.'%(win,type))
         return -1
    
     newwin = self.number()
     return newwin
Example #6
0
 def __setup_savedir(self, basedir, savedir):
     out("Setting up session directory \"%s\"" % savedir)
     if not os.path.exists(basedir):
         os.makedirs(basedir)
         f = open(os.path.join(basedir, self.blacklistfile), "w")
         f.close()
     return True
    def __move_all_windows(self,shift,group,kill=False):
        homewindow=int(self.homewindow)
        # create wrap group for existing windows
        if not self.bNoGroupWrap:
            self.screen('-t \"%s\" //group' % ('%s_%s'%(group,self.__unique_ident)) )
            self.group(False,self.none_group)
            self.wrap_group_id=self.number()

        # move windows by shift and put them in a wrap group
        #for cwin,cgroupid,ctype,ctty in sc.gen_all_windows_fast(self.pid):
        for cwin,cgroupid,cgroup,ctty,ctype,ctypestr,ctitle,cfilter,cscroll,ctime in sc.gen_all_windows_full(self.pid):
            iwin=int(cwin)
            if iwin==homewindow:
                homewindow=iwin+shift
                self.homewindow=str(homewindow)
            
            cgroupid,cgroup = self.get_group(cwin)
            if not self.bNoGroupWrap and cgroup==self.none_group:
                self.select(self.wrap_group_id)
                self.group(False,group,str(cwin))
            command='%s -p %s -X number +%d' % (self.sc,cwin,shift)
            if not self.bNoGroupWrap and str(cwin)==str(self.wrap_group_id):
                out('Moving wrap group %s to %d'%(cwin,iwin+shift))
                self.wrap_group_id=str(iwin+shift)
            else:
                out('Moving window %s to %d'%(cwin,iwin+shift))
            os.system(command)
        self.select('%d'%(homewindow))
 def __rollback(self,cmdline):
     try:
         cmdline=cmdline.split('\0')
         if cmdline[3]=='0':
             requireme(self.homedir,cmdline[2], cmdline[4])
         else:
             requireme(self.homedir,cmdline[2], cmdline[3])
         path=os.path.join(self.homedir,cmdline[2],cmdline[4])
         fhead,ftail=os.path.split(cmdline[4])
         target=os.path.join(self.homedir,self.projectsdir,self.savedir,ftail+'__rollback')
         number=ftail.split('_')[1]
         oldsavedir=fhead
         
         # import win_* files from previous savefiles
         try:
             shutil.move(os.path.join(self.homedir,cmdline[2],cmdline[4]),target)
         except Exception,e:
             out(str(e))
             target=None
             pass
         
         # import hardcopy.* files from previous savefiles
         fhead,ftail=os.path.split(cmdline[3])
         target2=os.path.join(self.homedir,self.projectsdir,self.savedir,ftail+'__rollback')
         try:
             shutil.move(os.path.join(self.homedir,cmdline[2],cmdline[3]),target2)
         except Exception,e:
             #out(str(e))
             target2=None
             pass
Example #9
0
    def find_subgraphs(self, df, relations, max_size=40000, verbose=False):
        if verbose:
            t1 = ut.out('finding subgraphs...')

        if verbose:
            t1 = ut.out('building networkx graph...')
        g = self.build_networkx_graph(df, relations)
        ccs = list(nx.connected_components(g))
        if verbose:
            ut.time(t1)

        if verbose:
            t1 = ut.out('processing connected components...')
        subgraphs = self._process_components(ccs, g)
        if verbose:
            ut.time(t1)

        # t1 = ut.out('filtering redundant subgraphs...')
        # subgraphs = self._filter_redundant_subgraphs(subgraphs, df)
        # ut.time(t1)

        # t1 = ut.out('removing single edge hubs...')
        # subgraphs = self._remove_single_edge_hubs(subgraphs, g)
        # ut.time(t1)

        # t1 = ut.out('compiling single node subgraphs...')
        # subgraphs += self._single_node_subgraphs(subgraphs, df, max_size)
        # ut.time(t1)

        if verbose:
            self._print_subgraphs_size(subgraphs)
        return g, subgraphs
Example #10
0
def run_benchmarks(debug=False, overwrite=False, refine=None, synonly=False, service=False, apps=None):
  results = RunResults('benchmarks', overwrite)
  if refine is None:
    refine = 0

  cases = load_app_sources(BENCHMARK_DIR, defwarn=True, apps=apps)
  apps = list(cases.keys())
  apps.sort()
  for appname in apps:
    inps = cases[appname]
    srcfls = inps[0]
    poldict = inps[1]
    seeds = inps[2]
    apppath = inps[3]
    opts = inps[4]

    opts.append('-N')
    opts.append(appname)
    if synonly:
      opts.append('-z')

    if appname in LARGE_BENCHMARKS:
      # Forgo interprocedural analysis for these benchmarks.
      opts.append('-P')

    # Run with each policy file separately.
    if MAJOR >= 3: politems = poldict.items()
    else: politems = poldict.iteritems()
    for poldesc, polfiles in politems:
      result = RunResult(False, False)
      results.add(result)

      out('Analyzing %s' % appname)
      if service:
        outp, errp = query_jam_service(srcfls, polfiles, refine=refine, seeds=seeds, moreopts=opts)
      else:
        outp, errp = run_jam(srcfls, polfiles, refine=refine, debug=debug, seeds=seeds, moreopts=opts)
      
      # Error case, message printed in |run_jam|.
      if outp is None: continue

      refsuf = get_suffix(synonly, refine, poldesc)

      expfile = '%s.%s.out.js' % (appname, refsuf)
      exppath = os.path.join(apppath, expfile)
      result.js_ok = process_result(outp, exppath, overwrite)

      infopath = get_info_path(errp)
      if infopath is None:
        err('Could not determine info path: %s\n' % appname)
        err('ERRP: %s' % errp)
        continue

      infoexpfile = '%s.%s.info.txt' % (appname, refsuf)
      infoexppath = os.path.join(apppath, infoexpfile)
      result.info_ok = process_info(infopath, infoexppath, overwrite)

      sys.stderr.write('\n')

  results.printSummary()
Example #11
0
def run_tx_tests(case=None, debug=False, jscmd=JS_COMMAND, moreopts=[]):
  tot = 0
  tot_ok = 0
  start = time.time()

  testcases = load_testcases(JAMSCRIPT_TESTDIR, None, filter=case)

  for inps in testcases:
    tot += 1
    jspath = inps[0]
    policies = inps[1]
    outp = run_tx(jspath, policies, perf=debug, debug=debug, jscmd=jscmd)
    exppath = os.path.splitext(jspath)[0] + '.exp'
    stat = validate_output(outp, exppath)
    if stat == 'match':
      tot_ok += 1
    jsname = os.path.basename(jspath)

    if debug:
      sys.stdout.write(outp)
    if debug or stat != 'match':
      out('%s %s' % (jsname, stat))

  end = time.time()
  tottime = end - start

  vals = (tot_ok, tot, tottime)
  out('%d of %d transaction tests successful; %.2fs\n' % vals)
Example #12
0
    def load(self):
        if 'all' in self.force_start:
            self.primer_arg += 'S'
            self.force_start = []
        if 'all' in self.scroll:
            self._scrollfile = None
        out('session "%s" loading "%s"' % (self.pid, os.path.join(self.basedir,
            self.savedir)))

        #check if the saved session exists and get the biggest saved window number and a number of saved windows

        maxnewwindow = 0
        newwindows = 0
        try:
            winlist = list(glob.glob(os.path.join(self.basedir, self.savedir,
                           'win_*')))
            newwindows = len(winlist)
            out('%d new windows' % newwindows)
        except Exception:
            sys.stderr.write('Unable to open winlist.\n')
            return 1

        # keep original numbering, move existing windows

        self.homewindow = self.number()

        if self.exact:
            maxnewwindow = -1
            for w in winlist:
                try:
                    w = int(w.rsplit("_", 1)[1])
                    if w > maxnewwindow:
                        maxnewwindow = w
                except:
                    pass

            out('Biggest new window number: %d' % maxnewwindow)
            if self.enable_layout:
                self.__remove_all_layouts()
            self.__move_all_windows(maxnewwindow + 1, self.group_other,
                                    False)

        out("\nLoading windows:")
        self.__load_screen()

        if self.enable_layout:
            out("\nLoading layouts:")
            try:
                self.__load_layouts()
            except:
                sys.stderr.write('Layouts loading failed!\n')
                # raise

        self.__restore_mru()
        sc.cleanup()

        return 0
Example #13
0
    def _print_subgraphs_size(self, subgraphs):
        tot_m, tot_h, tot_e = 0, 0, 0

        for ids, hubs, rels, edges in subgraphs:
            tot_m += len(ids)
            tot_h += len(hubs)
            tot_e += edges

        t = (len(subgraphs), tot_m, tot_h, tot_e)
        ut.out('subgraphs: %d, msgs: %d, hubs: %d, edges: %d' % t)
Example #14
0
    def __load_screen(self):
        homewindow = self.homewindow

        # out ("Homewindow is " +homewindow)

        #check if target Screen is currently in some group and set hostgroup to it

        (hostgroupid, hostgroup) = self.get_group(homewindow)
        rootwindow = self.number()
        if self.exact:
            rootgroup = self.none_group
            hostgroup = self.none_group
        elif self.bNoGroupWrap:
            rootgroup = self.none_group
        else:

            #create a root group and put it into host group

            rootgroup = "RESTORE_" + self.savedir
            rootwindow = self.screen('-t \"%s\" %s //group' % (rootgroup, 0))
            self.group(False, hostgroup, rootwindow)

        out("restoring Screen session inside window %s (%s)" % (rootwindow,
            rootgroup))

        self.command_at(True, 'setenv SCREENSESSION %s' % os.path.join(self.basedir, self.savedir) )

        wins = []
        for id in range(0, int(self.MAXWIN_REAL)):
            try:
                filename = os.path.join(self.basedir, self.savedir,
                        "win_" + str(id))
                if os.path.exists(filename):
                    f = open(filename)
                    win = list(f)[0:9]
                    f.close()
                    win = [x.strip() for x in win]
                    try:
                        nproc = win[8]
                    except:
                        nproc = '0'
                    wins.append((
                        win[0],
                        win[1],
                        win[2],
                        win[3],
                        self.__escape_bad_chars(win[4]),
                        win[5],
                        win[6],
                        win[7],
                        nproc,
                        ))
            except Exception,x:
                sys.stderr.write('%d Unable to load window ( %s )\n' %
                        (id, str(x)))
Example #15
0
    def gen_relational_ids(self, df, relations, data_dir=None, exact=True):
        """Generates relational ids for a given dataframe."""
        df = df.copy()

        for relation, group, group_id in relations:
            ut.out(relation + '...')
            if exact:
                df = self._gen_group_id(df, group_id)
            else:
                df = self._gen_group_id_lists(df, group_id, data_dir=data_dir)
        return df
Example #16
0
    def load(self):
        if "all" in self.force_start:
            self.primer_arg += "S"
            self.force_start = []
        if "all" in self.scroll:
            self._scrollfile = None
        out('session "%s" loading "%s"' % (self.pid, os.path.join(self.basedir, self.savedir)))

        # check if the saved session exists and get the biggest saved window number and a number of saved windows

        maxnewwindow = 0
        newwindows = 0
        try:
            winlist = list(glob.glob(os.path.join(self.basedir, self.savedir, "win_*")))
            newwindows = len(winlist)
            out("%d new windows" % newwindows)
        except Exception:
            sys.stderr.write("Unable to open winlist.\n")
            return 1

        # keep original numbering, move existing windows

        self.homewindow = self.number()

        if self.exact:
            maxnewwindow = -1
            for w in winlist:
                try:
                    w = int(w.rsplit("_", 1)[1])
                    if w > maxnewwindow:
                        maxnewwindow = w
                except:
                    pass

            out("Biggest new window number: %d" % maxnewwindow)
            if self.enable_layout:
                self.__remove_all_layouts()
            self.__move_all_windows(maxnewwindow + 1, self.group_other, False)

        out("\nLoading windows:")
        self.__load_screen()

        if self.enable_layout:
            out("\nLoading layouts:")
            try:
                self.__load_layouts()
            except:
                sys.stderr.write("Layouts loading failed!\n")
                # raise

        self.__restore_mru()
        sc.cleanup()

        return 0
Example #17
0
def retrieve_chunk(df, max_size=5000000, chunk_number=0):
    if chunk_number == -1:
        return df

    for i in range(2, 50):
        ut.out('splitting into %d chunks...' % i)
        dfs = np.array_split(df, i)

        if len(dfs[0]) <= max_size:
            ut.out('return chunk %d...' % chunk_number)
            return dfs[chunk_number]
    return df
Example #18
0
def process_result(outp, exppath, overwrite):
  ok = False
  if overwrite:
    stat = overwrite_expected(outp, exppath)
    if stat == 'overwritten' or stat == 'created':
      ok = True
  else:
    stat = validate_output(outp, exppath)
    if stat == 'match':
      ok = True
  expname = os.path.basename(exppath)
  out('%s %s' % (expname, stat))
  return ok
 def run(self, usernames, vote_urls):
     for vote_url in vote_urls:
         out(f"Voting for the url {vote_url}")
         for username in usernames:
             driver = self.init_driver()
             self.install_ext(driver)
             while True:
                 try:
                     self.vote(driver, username, vote_url)
                     break
                 except UnexpectedAlertPresentException:
                     # Captcha Error
                     out(f"Retrying to vote for {username}")
                     continue
Example #20
0
    def _text_ids(self, df, g_id, data_dir=None):
        fp = None if data_dir is None else data_dir + 'text_sim.csv'

        if data_dir is not None and os.path.exists(fp):
            ut.out('reading sim file...', 0)
            r_df = pd.read_csv(fp)
            r_df = r_df[r_df['com_id'].isin(df['com_id'])]
            g_df = r_df.groupby(g_id).size().reset_index()
            g_df = g_df[g_df[0] > 1]
            r_df = r_df[r_df[g_id].isin(g_df[g_id])]
        else:
            df = df[df['text'] != '']
            r_df = self._cols_to_ids(df, g_id=g_id, cols=['text'])
        return r_df
Example #21
0
    def consolidate(self, subgraphs, max_size=40000, div=2):
        """Combine subgraphs into larger sets to reduce total number of
        subgraphs to do inference over."""
        t1 = ut.out('consolidating subgraphs...')

        sgs = []
        new_ids, new_hubs = set(), set()
        new_rels, new_edges = set(), 0

        for ids, hubs, rels, edges in subgraphs:
            size = int(len(new_ids) / div) + int(len(ids) / div)
            size += new_edges + edges

            if size < max_size:  # keep adding to new
                new_ids.update(ids)
                new_rels.update(rels)
                new_hubs.update(hubs)
                new_edges += edges
            elif new_edges == 0 and size > max_size:  # subgraph too big
                new_ids.update(ids)
                new_hubs.update(hubs)
                new_rels.update(rels)
                new_edges += edges
            else:  # new is full
                sgs.append((new_ids, new_hubs, new_rels, new_edges))
                new_ids, new_hubs = ids, hubs
                new_rels, new_edges = rels, edges

        if len(new_ids) > 0:
            sgs.append((new_ids, new_hubs, new_rels, new_edges))

        ut.time(t1)
        self._print_subgraphs_size(sgs)

        return sgs
    def __scrollback_clean(self):
        '''clean up scrollback files: remove empty lines at the beginning and at the end of a file'''
        for f in glob.glob(os.path.join(self.basedir,self.savedir,'hardcopy.*')):
            try:
                ftmp=f+"_tmp"
                temp=open(ftmp,'w')
                thefile = open(f,'r')
                beginning=True
                for line in thefile:
                    if beginning: 
                        if cmp(line,'\n') == 0:
                            line = line.replace('\n','')
                        else:
                            beginning=False
                    temp.write(line)
                temp.close()
                thefile.close()

                temp = open( ftmp, 'r' )
                endmark=-1
                lockmark=False
                for i,line in enumerate(temp):
                    if cmp(line,'\n') == 0:
                        if not lockmark:
                            endmark=i
                            lockmark=True
                    else:
                        endmark=-1
                        lockmark=False
                temp.close()

                if endmark > 1:
                    thefile = open(f , 'w')
                    temp=open(ftmp,'r')
                    for i,line in enumerate(temp):
                        if i == endmark:
                            break;
                        else:
                            thefile.write(line)
                    thefile.close()
                    temp.close()
                    util.remove(ftmp)
                else:
                    util.remove(f)
                    os.rename(ftmp,f)
            except:
                out ('Unable to clean scrollback file: '+f)
Example #23
0
  def printSummary(self):
    self.end = time.time()
    tottime = self.end - self.start

    tot = 0
    js_tot = 0
    js_ok = 0
    info_tot = 0
    info_ok = 0
    html_tot = 0
    html_ok = 0
    for res in self.results:
      assert isinstance(res, RunResult)
      tot += 1
      if res.js_ok is not None:
        js_tot += 1
        if res.js_ok: js_ok += 1
      if res.info_ok is not None:
        info_tot += 1
        if res.info_ok: info_ok += 1
      if res.html_ok is not None:
        html_tot += 1
        if res.html_ok: html_ok += 1

    if self.overwrite:
      action = 'overwrote'
    else:
      action = 'verified'

    txts = []
    if js_tot > 0:
      jstxt = '%d/%d JS output' % (js_ok, js_tot)
      txts.append(jstxt)
    if info_tot > 0:
      infotxt = '%d/%d info files' % (info_ok, info_tot)
      txts.append(infotxt)
    if html_tot > 0:
      htmltxt = '%d/%d HTML output' % (html_ok, html_tot)
      txts.append(htmltxt)
    
    if len(txts) > 0:
      restxt = '%s %s' % (action, ', '.join(txts))
    else:
      restxt = 'no results'

    out('%s for %d %s; %.2fs\n' % (restxt, tot, self.desc, tottime))
Example #24
0
def plot_distributions(df, feats=[]):

    dfs = df[df['label'] == 1]
    dfh = df[df['label'] == 0]

    for feat in feats:
        ut.out('plotting distribution for: %s\n' % feat, 0)
        f, ax = plt.subplots(1, 1)
        ns, bs, ps = ax.hist(dfs[feat], normed=1, color='r', alpha=0.69)
        nh, bh, ph = ax.hist(dfh[feat], normed=1, color='b', alpha=0.69)
        ms, ss = norm.fit(dfs[feat])
        mh, sh = norm.fit(dfh[feat])
        ys = mlb.normpdf(bs, ms, ss)
        yh = mlb.normpdf(bh, mh, sh)
        ax.plot(bs, ys, 'r--')
        ax.plot(bh, yh, 'b--')
        f.savefig(feat + '.pdf', format='pdf', bbox_inches='tight')
        plt.clf()
    def save(self):
        self.homewindow,title=self.get_number_and_title()
        out("\nCreating directories:")
        if not self.__setup_savedir(self.basedir,self.savedir):
            return 1

        if self.enable_layout:
            out("\nSaving layouts:")
            self.homewindow_last,title=self.get_number_and_title()
            self.__save_layouts()
            out("")

        out("\nSaving windows:")
        self.__save_screen()
        
        out("\nCleaning up:")
        self.__scrollback_clean()
        return 0
Example #26
0
    def prepare_registration(self):
        """Prepare data for the profile log registration to the database.

        @param self
        """
        d = dict()
        d["profs"] = self.profs
        d["node_set"] = self.nodeset
        d["nodes"] = len(d["node_set"])
        d["nproc"] = len(self.profs)
        # XML metadata dictionary from main profile log
        lcands = [p for p in self.profs
                  if p.filename.endswith("profile.0.0.0")]
        assert(len(lcands) == 1)
        d["main_loader"] = lcands[0]
        d["soupdic"] = util.soup2dic(d["main_loader"].soup)
        # Place of execution
        d["place"] = util.getplacename(self.options, d)
        # Execution time
        d["start_ts"] = int(d["soupdic"]["Starting Timestamp"])
        t0 = int(d["soupdic"]["Starting Timestamp"])
        t1 = int(d["soupdic"]["Timestamp"])
        d["exec_time"] = (t1 - t0) / 1e6
        # library
        d["library"] = util.NVL(self.options.library, "")
        # Application name
        d["app_viewname"] = util.NVL(self.options.appname, "Unknown")
        # (node, context, thread) => rank mapping
        d["use_rankmap"] = True
        d["rankmap"] = {}
        prof_rank_index = 0
        for p in sorted(self.profs,
                        cmp=lambda x, y:
                            Registerer.triple_comparator(
                util.filename2triple(x.filename),
                util.filename2triple(y.filename))):
            triple = util.filename2triple(p.filename)
            triple_s = ".".join(triple)
            d["rankmap"][triple_s] = prof_rank_index
            prof_rank_index += 1
        if self.options.verbose >= 3:
            util.out("Infodic: ", d)
        ## Dictionary of all information
        self.infodic = d
Example #27
0
    def __load_screen(self):
        homewindow = self.homewindow

        # out ("Homewindow is " +homewindow)

        # check if target Screen is currently in some group and set hostgroup to it

        (hostgroupid, hostgroup) = self.get_group(homewindow)
        rootwindow = self.number()
        if self.exact:
            rootgroup = self.none_group
            hostgroup = self.none_group
        elif self.bNoGroupWrap:
            rootgroup = self.none_group
        else:

            # create a root group and put it into host group

            rootgroup = "RESTORE_" + self.savedir
            rootwindow = self.screen('-t "%s" %s //group' % (rootgroup, 0))
            self.group(False, hostgroup, rootwindow)

        out("restoring Screen session inside window %s (%s)" % (rootwindow, rootgroup))

        self.command_at(True, "setenv SCREENSESSION %s" % os.path.join(self.basedir, self.savedir))

        wins = []
        for id in range(0, int(self.MAXWIN_REAL)):
            try:
                filename = os.path.join(self.basedir, self.savedir, "win_" + str(id))
                if os.path.exists(filename):
                    f = open(filename)
                    win = list(f)[0:9]
                    f.close()
                    win = [x.strip() for x in win]
                    try:
                        nproc = win[8]
                    except:
                        nproc = "0"
                    wins.append(
                        (win[0], win[1], win[2], win[3], self.__escape_bad_chars(win[4]), win[5], win[6], win[7], nproc)
                    )
            except Exception, x:
                sys.stderr.write("%d Unable to load window ( %s )\n" % (id, str(x)))
Example #28
0
def _prune_redundant_ids(all_ids):
    result = all_ids.copy()

    l = [list(x) for x in list(all_ids.values())]
    ll = [x for sublist in l for x in sublist]
    group_ids = Counter(ll)

    ut.out('keys: %d, values: %d...' % (len(all_ids.keys()), len(ll)))

    for i, (key, vals) in enumerate(all_ids.items()):
        if len(vals) > 1:
            redundant_ids = set([v for v in vals if group_ids[v] == 1])

            if len(redundant_ids) > 1:
                redundant_ids.remove(min(redundant_ids))
                for redundant_id in redundant_ids:
                    result[key].remove(redundant_id)

    return result
Example #29
0
def purge():
    ut.out('purging...')

    domains = [
        'adclicks', 'ifwe', 'twitter', 'youtube', 'soundcloud', 'russia',
        'toxic', 'yelp_hotel', 'yelp_restaurant'
    ]

    folders_to_purge = [
        'independent/data/%s/folds/*', 'independent/output/%s/predictions/*',
        'relational/output/%s/experiments/*',
        'relational/output/%s/predictions/*', 'relational/mrf/*',
        'relational/psl/data/%s/*'
    ]

    for domain in domains:
        for folder in folders_to_purge:
            path = folder % domain if '%s' in folder else folder
            os.system('rm -rf %s' % path)
    def __save_layouts(self):
        homelayout,homelayoutname=self.get_layout_number()
        layoutname=homelayoutname
        
        if homelayout==-1:
            out("No layouts to save. Create layouts with \":layout new\"")
            return False
        currentlayout=homelayout

        loop_exit_allowed=False
        while currentlayout!=homelayout or not loop_exit_allowed:
            loop_exit_allowed=True
            sys.stdout.write("%s(%s); "%(currentlayout,layoutname))
            self.command_at(False,'eval \'layout dump \"%s\"\' \'dumpscreen layout \"%s\"\' \'layout next\''%(os.path.join(self.basedir,self.savedir,"layout_"+currentlayout+"_"+layoutname),os.path.join(self.basedir,self.savedir,"winlayout_"+currentlayout+"_"+layoutname)))
            currentlayout,layoutname=self.get_layout_number()
        
        linkify(os.path.join(self.basedir,self.savedir),"layout_"+homelayout+"_"+homelayoutname,"last_layout")
        
        return True
Example #31
0
    def __move_all_windows(self, shift, group, kill=False):
        homewindow = int(self.homewindow)

        # create a wrap group for existing windows

        if not self.bNoGroupWrap:
            self.wrap_group_id = self.screen('-t \"%s\" //group' % ('%s_%s' % (group, self.__unique_ident)))
            self.group(False, self.none_group, self.wrap_group_id)

        # move windows by shift and put them in a wrap group
        #for cwin,cgroupid,ctype,ctty in sc.gen_all_windows_fast(self.pid):

        for (
            cwin,
            cgroupid,
            cgroup,
            ctty,
            ctype,
            ctypestr,
            ctitle,
            cfilter,
            cscroll,
            ctime,
            cmdargs,
            ) in sc.gen_all_windows_full(self.pid, sc.require_dumpscreen_window(self.pid,
                    True)):
            iwin = int(cwin)
            if iwin == homewindow:
                homewindow = iwin + shift
                self.homewindow = str(homewindow)

            if not self.bNoGroupWrap and cgroup == self.none_group:
                self.select(self.wrap_group_id)
                self.group(False, group, str(cwin))
            command = '%s -p %s -X number +%d' % (self.sc, cwin, shift)
            if not self.bNoGroupWrap and str(cwin) == str(self.wrap_group_id):
                out('Moving wrap group %s to %d' % (cwin, iwin + shift))
                self.wrap_group_id = str(iwin + shift)
            else:
                out('Moving window %s to %d' % (cwin, iwin + shift))
            os.system(command)
        self.select('%d' % homewindow)
Example #32
0
    def __save_layouts(self):
        (homelayout, homelayoutname) = self.get_layout_number()
        findir = sc.datadir
        if homelayout == -1:
            sys.stderr.write("No layouts to save.\n")
            return False
        path_layout = os.path.join(findir, "load_layout")
        oflayout = open(path_layout, "w")
        ex_lay = []
        for lay in sc.gen_layout_info(self, sc.dumpscreen_layout_info(self)):
            try:
                num = lay[0]
                title = lay[1]
            except:
                title = ""
            if self.excluded_layouts and (num in self.excluded_layouts or title in self.excluded_layouts):
                ex_lay.append(lay)
            else:
                sys.stdout.write("%s(%s); " % (num, title))
                oflayout.write(
                    """layout select %s
layout dump \"%s\"
dumpscreen layout \"%s\"
"""
                    % (num, os.path.join(findir, "layout_" + num), os.path.join(findir, "winlayout_" + num))
                )

        oflayout.write("layout select %s\n" % homelayout)
        oflayout.close()
        self.source(path_layout)
        util.remove(path_layout)
        linkify(findir, "layout_" + homelayout, "last_layout")
        if ex_lay:
            sys.stdout.write(
                """

Excluded layouts: %s"""
                % str(ex_lay)
            )

        out("")
        return True
Example #33
0
def main():
  parser = OptionParser(usage="%prog patchconfig.py")
  #parser.add_option('-s', '--semantics', action='store_true', default=False, dest='semantics', help='test semantics')

  opts, args = parser.parse_args()
    
  if len(args) != 1:
    parser.error("Invalid number of arguments")

  global cfg
  cfg = imp.load_source("cfg", args[0]) 

  out("Identifying differing files")
  exclusions = getattr(cfg, 'EXCLUSIONS', [])
  ignorebin = getattr(cfg, 'IGNORE_BINARY', False)
  diffFiles = getDiffFiles(cfg.ORIGDIR, cfg.DEVDIR, exclusions, ignorebin=ignorebin)

  preparePatchDirectory(cfg.PATCHDIR)
  for oldfl, newfl in diffFiles:
    makePatch(oldfl, newfl, cfg.PATCHDIR, ignorebin=ignorebin)
Example #34
0
    def __move_all_windows(self, shift, group, kill=False):
        homewindow = int(self.homewindow)

        # create a wrap group for existing windows

        if not self.bNoGroupWrap:
            self.wrap_group_id = self.screen('-t "%s" //group' % ("%s_%s" % (group, self.__unique_ident)))
            self.group(False, self.none_group, self.wrap_group_id)

        # move windows by shift and put them in a wrap group
        # for cwin,cgroupid,ctype,ctty in sc.gen_all_windows_fast(self.pid):

        for (
            cwin,
            cgroupid,
            cgroup,
            ctty,
            ctype,
            ctypestr,
            ctitle,
            cfilter,
            cscroll,
            ctime,
            cmdargs,
        ) in sc.gen_all_windows_full(self.pid, sc.require_dumpscreen_window(self.pid, True)):
            iwin = int(cwin)
            if iwin == homewindow:
                homewindow = iwin + shift
                self.homewindow = str(homewindow)

            if not self.bNoGroupWrap and cgroup == self.none_group:
                self.select(self.wrap_group_id)
                self.group(False, group, str(cwin))
            command = "%s -p %s -X number +%d" % (self.sc, cwin, shift)
            if not self.bNoGroupWrap and str(cwin) == str(self.wrap_group_id):
                out("Moving wrap group %s to %d" % (cwin, iwin + shift))
                self.wrap_group_id = str(iwin + shift)
            else:
                out("Moving window %s to %d" % (cwin, iwin + shift))
            os.system(command)
        self.select("%d" % homewindow)
Example #35
0
    def __save_layouts(self):
        (homelayout, homelayoutname) = self.get_layout_number()
        findir = sc.datadir
        if homelayout == -1:
            sys.stderr.write("No layouts to save.\n")
            return False
        path_layout = os.path.join(findir, "load_layout")
        oflayout = open(path_layout, "w")
        ex_lay = []
        for lay in sc.gen_layout_info(self, sc.dumpscreen_layout_info(self)):
            try:
                num = lay[0]
                title = lay[1]
            except:
                title = ""
            if self.excluded_layouts and (num in self.excluded_layouts or
                    title in self.excluded_layouts):
                ex_lay.append(lay)
            else:
                sys.stdout.write("%s(%s); " % (num, title))
                oflayout.write('''layout select %s
layout dump \"%s\"
dumpscreen layout \"%s\"
''' %
                               (num, os.path.join(findir, "layout_" +
                               num), os.path.join(findir, "winlayout_" +
                               num)))

        oflayout.write('layout select %s\n' % homelayout)
        oflayout.close()
        self.source(path_layout)
        util.remove(path_layout)
        linkify(findir, "layout_" + homelayout, "last_layout")
        if ex_lay:
            sys.stdout.write("""

Excluded layouts: %s""" % str(ex_lay))

        out("")
        return True
 def __restore_mru(self):
     try:
         mru=open(os.path.join(self.basedir,self.savedir,"mru"),'r').read().strip().split(' ')
         mru.reverse()
         for win in mru:
             self.select("%s"%self.__wins_trans[win])
     except:
         out('Unable to restore MRU!')
         pass
     if self.restore_previous:
         self.select(self.homewindow)
     elif os.path.exists(os.path.join(self.basedir,self.savedir,"last_win")):
         # select last selected window
         last=os.readlink(os.path.join(self.basedir,self.savedir,"last_win"))
         (lasthead,lasttail)=os.path.split(last)
         lastid=lasttail.split("_",1)[1]
         try:
             self.select(self.__wins_trans[lastid])
         except:
             self.select('-')
     else:
         self.select('-')
Example #37
0
def print_cell(content, width, leftmargin=1):
    out(' ' * leftmargin)
    string = str(content)
    pad = (width - len(string)) * ' '
    if util.isnumber(content):
        out(pad + string)
    else:
        out(string + pad)
Example #38
0
def print_cell(content, width, leftmargin=1):
    out(' ' * leftmargin)
    string = str(content)
    pad = (width - len(string)) * ' '
    if util.isnumber(content):
        out(pad + string)
    else:
        out(string + pad)
Example #39
0
    def __restore_mru(self):
        if self.enable_layout and not self.mru:
            pass
        else:
            try:
                if self.mru:
                    sys.stdout.write("\nRestoring MRU windows order:")
                else:
                    sys.stdout.write("\nSelecting last window:")

                mru_w = []
                ifmru = open(os.path.join(self.basedir, self.savedir,
                             "mru"), 'r')
                for line in ifmru:
                    n = line.strip()
                    try:
                        nw = (self.__wins_trans)[n]
                        mru_w.append('select ' + nw + '\n')
                        sys.stdout.write(' %s' % nw)
                        if not self.mru:
                            break
                    except:
                        if self.enable_layout:
                            mru_w.append('select -\n')
                        else:
                            pass
                ifmru.close()
                mru_w.reverse()
                path_mru_tmp = os.path.join(self.basedir, self.savedir,
                        "mru_tmp")
                ofmru = open(path_mru_tmp, "w")
                ofmru.writelines(mru_w)
                ofmru.close()
                self.source(path_mru_tmp)
                util.remove(path_mru_tmp)
            except:
                sys.stderr.write(' Failed to load MRU.')
            out("")
    def __setup_savedir(self,basedir,savedir):
        out ("Setting up session directory %s" % savedir)
        if not os.path.exists(basedir):
            os.makedirs(basedir)
            f=open(os.path.join(basedir,self.blacklistfile),'w')
            f.close()

        if os.path.exists(os.path.join(basedir,savedir)):
            out("Directory \"%s\" in \"%s\" already exists. Use --force to overwrite." % (savedir, basedir))
            if self.force:
                out('forcing..')
                out('cleaning up \"%s\"' % savedir)
                map(os.remove,glob.glob(os.path.join(basedir,savedir,'win_*')))
                map(os.remove,glob.glob(os.path.join(basedir,savedir,'hardcopy.*')))
                map(os.remove,glob.glob(os.path.join(basedir,savedir,'layout_*')))
                map(os.remove,glob.glob(os.path.join(basedir,savedir,'winlayout_*')))
                return True
            else:
                out('Aborting.')
                return False
        else:
            os.makedirs(os.path.join(basedir,savedir))
            return True
Example #41
0
    def _add_profgroup(self):
        """Safely insert profgroup.
        If a record with the same profgroup condition exists,
        it only returns the id column of that,
        otherwise it inserts a new record and returns the id.

        @param self
        @return id of profgroup to insert into profexec table
        """
        sql_s = """SELECT id
                   FROM profgroup
                   WHERE
                       application = ?
                   AND nodes = ?
                   AND procs = ?
                   AND place = ?
                   AND library = ?;
                   """
        pd = self.infodic
        print pd
        rtup = self.conn.select(sql_s,
                                (pd["soupdic"]["Executable"].encode("utf_8"),
                                 pd["nodes"],
                                 pd["nproc"],
                                 pd["place"].encode("utf_8"),
                                 pd["library"].encode("utf_8"),))
        if len(rtup) == 0:
            if self.options.verbose >= 1:
                util.out("No such profgroup. will newly insert...")
            pginsert = {
                "application": pd["soupdic"]["Executable"].encode("utf_8"),
                "app_viewname": pd["app_viewname"],
                "nodes": pd["nodes"],
                "procs": pd["nproc"],
                "place": pd["place"].encode("utf_8"),
                "library": pd["library"].encode("utf_8")}
            if self.options.verbose >= 3:
                util.out("new profgroup dict", pginsert)
            rdic = self.conn.insert("profgroup", pginsert)
            rt = rdic["id"]
            if self.options.verbose >= 1:
                util.out("New profgroup %d: %s" % (rt, pginsert))
        else:
            if self.options.verbose >= 1:
                util.out("Using existing profgroup ...")
            rt = rtup[0][0]
        return rt
Example #42
0
    def main(self):
        """Main function.

        @param self
        @todo library などの指定を可能にする
        @todo SQLite3 のとき DB ファイルの指定を可能にする
        @todo -t でテストにする
        @todo class Parp など
        """
        self.parse_opt()
        # Data Prepare
        logdir = self.args[0]
        funcmapfile = self.args[1]
        self.load_profs(logdir, funcmapfile)
        ## Unique nodes list
        self.nodeset = util.node_set(self.profs)
        # Prepare information to add
        self.prepare_registration()
        # DB prepare
        #self.conn = db.init("postgres", username="******", hostname="127.0.0.1")
        self.conn = db.init("sqlite3", dbfile="/home/kabe/Archives/prof.db")
        ### BEGIN TRANSACTION ###
        self.conn.begin_transaction()
        # Register
        try:
            # Profgroup
            group_id = self.add_profgroup()
            # ProfExec Insert
            profexec_id = self.add_profexec(group_id)
            util.out(group_id, profexec_id)
            # Profile Insert
            self.insert_profile(profexec_id)
        except Exception, e:
            util.err("Exception in main", repr(e))
            self.conn.rollback_transaction()
            raise  # Re-raise the exception
Example #43
0
    def __restore_mru(self):
        if self.enable_layout and not self.mru:
            pass
        else:
            try:
                if self.mru:
                    sys.stdout.write("\nRestoring MRU windows order:")
                else:
                    sys.stdout.write("\nSelecting last window:")

                mru_w = []
                ifmru = open(os.path.join(self.basedir, self.savedir, "mru"), "r")
                for line in ifmru:
                    n = line.strip()
                    try:
                        nw = (self.__wins_trans)[n]
                        mru_w.append("select " + nw + "\n")
                        sys.stdout.write(" %s" % nw)
                        if not self.mru:
                            break
                    except:
                        if self.enable_layout:
                            mru_w.append("select -\n")
                        else:
                            pass
                ifmru.close()
                mru_w.reverse()
                path_mru_tmp = os.path.join(self.basedir, self.savedir, "mru_tmp")
                ofmru = open(path_mru_tmp, "w")
                ofmru.writelines(mru_w)
                ofmru.close()
                self.source(path_mru_tmp)
                util.remove(path_mru_tmp)
            except:
                sys.stderr.write(" Failed to load MRU.")
            out("")
Example #44
0
def knn_similarities(df, sim_thresh=0.8, n_neighbors=100,
                     approx_datapoints=120000, max_feats=None,
                     in_col='text', out_col='text_id', out_dir='',
                     fname='sim.csv'):
    ut.makedirs(out_dir)

    ut.out('splitting data into manageable chunks...')
    dfs = _split_data(df, approx_datapoints=approx_datapoints, in_col=in_col)
    all_ids = defaultdict(set)
    group_id = 0

    for n, chunk_df in enumerate(dfs):
        ut.out('creating tf-idf matrix for chunk %d...' % n)
        groups = defaultdict(lambda: set())
        g_df = chunk_df.groupby(in_col).size().reset_index()
        strings = list(g_df[in_col])
        tf_idf_matrix = _tf_idf(strings, analyzer=_ngrams, max_feats=max_feats)
        nbrs = NearestNeighbors(n_neighbors=n_neighbors).fit(tf_idf_matrix)
        ut.out(str(tf_idf_matrix.shape))

        ut.out('querying/filtering each object for its closest neighbors...')
        for row in range(len(strings)):

            # if row % 100 == 0:
            #     ut.out('%d' % row)

            distances, indexes = nbrs.kneighbors(tf_idf_matrix.getrow(row))
            nbs = list(zip(distances[0], indexes[0]))
            nbs = [(d, i) for d, i in nbs if d <= sim_thresh]

            # ut.out('\n%s' % strings[row])
            # for d, i in nbs[:5]:
            #     ut.out('[%d] %s: %f' % (i, strings[i], d))

            groups[group_id].update(set([i for d, i in nbs]))
            group_id += 1

        groups = _merge_identical_groups(groups)
        ids = _assign_ids_to_items(groups, strings)
        all_ids = _aggregate_identical_keys(all_ids, ids)

    all_ids = _prune_single_items(all_ids, df, in_col)
    all_ids = _prune_redundant_ids(all_ids)
    sim_df = _ids_to_dataframe(all_ids, df, in_col=in_col, out_col=out_col)
    sim_df.to_csv(out_dir + fname, index=None)
 def load(self):
     if 'all' in self.force_start:
             self.primer_arg+='S'
             self.force_start=[]
     if 'all' in self.scroll:
         self._scrollfile=None
     out('session "%s" loading "%s"' % (self.pid,os.path.join(self.basedir,self.savedir)))
     #check if the saved session exists and get the biggest saved window number and a number of saved windows
     maxnewwindow=0
     newwindows=0
     try:
         winlist=list(glob.glob(os.path.join(self.basedir,self.savedir,'win_*')))
         newwindows=len(winlist)
         out('%d new windows'%newwindows)
     except Exception,e:
         out('Unable to open.')
         out(str(e))
         return 1
Example #46
0
    def save(self):
        (self.homewindow, title) = self.get_number_and_title()
        out("\nCreating directories:")
        if not self.__setup_savedir(self.basedir, self.savedir):
            return 1
        sc.require_dumpscreen_window(self.pid, True)

        if self.enable_layout:
            out("\nSaving layouts:")
            self.__save_layouts()

        out("\nSaving windows:")
        self.__save_screen()

        out("\nCleaning up scrollbacks.")
        self.__scrollback_clean()
        if self.__vim_files:
            self.__wait_vim()
        return 0
Example #47
0
    def save(self):
        (self.homewindow, title) = self.get_number_and_title()
        out("\nCreating directories:")
        if not self.__setup_savedir(self.basedir, self.savedir):
            return 1
        sc.require_dumpscreen_window(self.pid, True)

        if self.enable_layout:
            out("\nSaving layouts:")
            self.__save_layouts()

        out("\nSaving windows:")
        self.__save_screen()

        out("\nCleaning up scrollbacks.")
        self.__scrollback_clean()
        if self.__vim_files:
            self.__wait_vim()
        return 0
Example #48
0
def _significance(df, pred, samples=20):
    ref_auprs, pred_auprs = [], []
    ref_aurocs, pred_aurocs = [], []
    lc, rc = 'label', 'ref_pred'

    t1 = ut.out('computing aupr and auroc significance levels...')

    for i in range(samples):
        s_df = df.sample(frac=0.5, replace=True)
        ref_auprs.append(average_precision_score(s_df[lc], s_df[rc]))
        ref_aurocs.append(roc_auc_score(s_df[lc], s_df[rc]))
        pred_auprs.append(average_precision_score(s_df[lc], s_df[pred]))
        pred_aurocs.append(roc_auc_score(s_df[lc], s_df[pred]))

    auprs = np.subtract(ref_auprs, pred_auprs)
    aurocs = np.subtract(ref_aurocs, pred_aurocs)
    zeros = np.zeros(len(auprs))
    t1, aupr_pval = ttest_rel(auprs, zeros)
    t2, auroc_pval = ttest_rel(aurocs, zeros)
    ut.time(t1)

    return aupr_pval, auroc_pval
Example #49
0
def _approximations(df, relations=[]):
    t1 = ut.out('approximating relational with mean, max, median...')
    df = df.copy()

    con_obj = Connections()

    g, sgs = con_obj.find_subgraphs(df, relations, verbose=False)
    approx_dict = {}

    sg_list = []
    for i, sg in enumerate(sgs):
        if sg[3] > 0:  # num edges > 0
            sg_list.extend([(x, i) for x in sg[0]])  # give sg_id

    if len(sg_list) == 0:
        return approx_dict

    sg_df = pd.DataFrame(sg_list, columns=['com_id', 'sg_id'])
    df = df.merge(sg_df, how='left')
    df['sg_id'] = df['sg_id'].fillna(-1).apply(int)

    sg_mean = df.groupby('sg_id')['ind_pred'].mean().reset_index()\
        .rename(columns={'ind_pred': 'sg_mean_pred'})
    sg_median = df.groupby('sg_id')['ind_pred'].median().reset_index()\
        .rename(columns={'ind_pred': 'sg_median_pred'})
    sg_max = df.groupby('sg_id')['ind_pred'].max().reset_index()\
        .rename(columns={'ind_pred': 'sg_max_pred'})
    df = df.merge(sg_mean).merge(sg_median).merge(sg_max)

    filler = lambda x, c: x['ind_pred'] if x['sg_id'] == -1 else x[c]
    for col in ['sg_mean_pred', 'sg_median_pred', 'sg_max_pred']:
        cols = ['ind_pred', col, 'sg_id']
        df[col] = df[cols].apply(filler, axis=1, args=(col,))

    ut.time(t1)

    return df
Example #50
0
def run_interpreter_tests(debug=False):
  tot = 0
  tot_ok = 0
  for flname in os.listdir(INTERPRETER_TEST_DIR):
    if os.path.splitext(flname)[1] != ".js": continue

    tot += 1
    flpath = os.path.join(INTERPRETER_TEST_DIR, flname)

    out(flname)

    outp = evaluate_file(flpath, debug)
    exppath = get_exp_path(flpath, '.exp')
    stat = validate_value(flpath, outp)

    if stat == 'match':
      tot_ok += 1
    out('%s %s\n' % (exppath, stat))

  out('%d of %d interpreter tests successful\n' % (tot_ok, tot))
Example #51
0
def _split_data(df, approx_datapoints=120000, in_col='text'):
    delta = 100000000

    if len(df.groupby(in_col).size()) <= approx_datapoints:
        ut.out('found optimal num pieces: 1')
        return [df]

    for i in range(2, 1000):
        dps = []
        pieces = np.array_split(df, i)

        for piece in pieces:
            dps.append(len(piece.groupby(in_col).size()))

        mean_dps = np.mean(dps)
        ut.out('num pieces: %d, mean datapoints: %.2f' % (i, mean_dps))

        new_delta = np.abs(approx_datapoints - mean_dps)
        if new_delta < delta:
            delta = new_delta
        else:
            ut.out('found optimal num pieces: %d' % (i - 1))
            pieces = np.array_split(df, i - 1)
            return pieces
Example #52
0
def _analyze(df, col, samples=100, relations=[]):
    gids = [r[2] for r in relations]

    if len(relations) == 0:
        return {}

    t1 = ut.out('computing messages missed most often...')

    p, r, ts = precision_recall_curve(df['label'], df[col])
    aupr = average_precision_score(df['label'], df[col])
    mp = 1.0 - aupr

    corrects = []
    step = int(len(ts) / 100) if len(ts) > 100 else 1
    for i in range(0, len(ts), step):
        t = ts[i]
        df['pred'] = np.where(df[col] > t, 1, 0)
        correct = df['pred'] == df['label']
        corrects.append(correct.apply(int))

    total_corrects = [sum(x) for x in zip(*corrects)]
    df['correct'] = total_corrects

    # extract bottom x% data
    df = df.sort_values('correct', ascending=False)
    ndx = len(df) - int(len(df) * mp)
    qf1, qf2 = df[ndx:], df[:ndx]
    # dfs = df[df['label'] == 1]
    qf1s = qf1[qf1['label'] == 1]  # low performers
    qf1o = qf1[qf1['label'] == 0]  # low performers
    qf2s = qf2[qf2['label'] == 1]  # high performers
    qf2o = qf2[qf2['label'] == 0]  # high performers
    ut.time(t1)

    # ut.out('spam in bot %.2f%%: %d' % (mp * 100, len(qf1s)))
    # ut.out('ham in bot %.2f%%: %d' % (mp * 100, len(qf1o)))

    t1 = ut.out('computing messages with a relation...')
    r1s, r1sf = _msgs_with_rel(qf1s, gids, mp, 'bot', 'spam')
    r1o, r1of = _msgs_with_rel(qf1o, gids, mp, 'bot', 'ham')
    r2s, r2sf = _msgs_with_rel(qf2s, gids, mp, 'top', 'spam')
    r2o, r2of = _msgs_with_rel(qf2o, gids, mp, 'top', 'ham')
    ut.time(t1)

    # ut.out()

    t1 = ut.out('computing messages with an outside relation...')
    rr1sof = _rm_in_sect(df, qf1s, qf2, gids, mp, r1s, 'bot', 'spam')
    rr1oof = _rm_in_sect(df, qf1o, qf2, gids, mp, r1o, 'bot', 'ham')
    rr2sof = _rm_in_sect(df, qf2s, qf1, gids, mp, r2s, 'top', 'spam')
    rr2oof = _rm_in_sect(df, qf2o, qf1, gids, mp, r2o, 'top', 'ham')
    # rr1sif = self._rm_in_sect(df, qf1s, qf1, gids, mp, r1s, 'bot', 'spam',
    #                           'inside')
    # rr1oif = self._rm_in_sect(df, qf1o, qf1, gids, mp, r1o, 'bot', 'ham',
    #                           'inside')

    sd = {}
    sd['bot_spam_rels'] = round(r1sf, 4)
    sd['bot_ham_rels'] = round(r1of, 4)
    sd['top_spam_rels'] = round(r2sf, 4)
    sd['top_ham_rels'] = round(r2of, 4)
    sd['bot_spam_rels_out'] = round(rr1sof, 4)
    sd['bot_ham_rels_out'] = round(rr1oof, 4)
    sd['top_spam_rels_out'] = round(rr2sof, 4)
    sd['top_ham_rels_out'] = round(rr2oof, 4)
    # sd['bot_spam_rels_in'] = rr1sif
    # sd['bot_ham_rels_in'] = rr1oif

    ut.time(t1)
    return sd
Example #53
0
def cosine_similarities(df, sim_thresh=0.8, in_col='text',
                        out_col='text_id', approx_datapoints=120000,
                        max_feats=None, k=5, max_id=0, out_dir='',
                        fname='sim.csv'):
    ut.makedirs(out_dir)

    group_id = max_id
    all_ids = defaultdict(set)
    dfs = _split_data(df, approx_datapoints=approx_datapoints, in_col=in_col)

    for n, chunk_df in enumerate(dfs):
        t1 = time.time()

        ut.out('\ncreating tf-idf matrix for chunk %d...' % (n + 1))
        groups = defaultdict(set)
        g_df = chunk_df.groupby(in_col).size().reset_index()
        strings = list(g_df[in_col])
        m = _tf_idf(strings, analyzer=_ngrams, max_feats=max_feats)

        v, total = len(m.data), m.shape[0] * m.shape[1]
        ut.out('sparsity: (%d/%d) %.2f%%' % (v, total, 100 * (v / total)))

        ut.out('computing cosine similarities...')
        cos_sim = cosine_similarity(m, dense_output=False)

        ut.out('filtering out simiarities below threshold...')
        scm = cos_sim >= sim_thresh

        ut.out('putting matches into groups...')
        for ndx in range(len(strings)):
            data = cos_sim[ndx].data
            indices = list(cos_sim[ndx].indices)
            sims = [(x, data[indices.index(x)]) for x in scm[ndx].indices]
            sims = sorted(sims, key=lambda x: x[1], reverse=True)
            sim_ids = [sim_ndx for sim_ndx, sim_val in sims[:k]]
            groups[group_id].update(set(sim_ids))
            group_id += 1

        ut.out('merging identical groups...')
        groups = _merge_identical_groups(groups)

        ut.out('assigning ids to items...')
        ids = _assign_ids_to_items(groups, strings)

        ut.out('aggregating identical keys...')
        all_ids = _aggregate_identical_keys(all_ids, ids)

        ut.out('chunk time: %.4fm' % ((time.time() - t1) / 60.0))

    t1 = time.time()
    ut.out('\nprune single items...')
    all_ids = _prune_single_items(all_ids, df, in_col)
    ut.time(t1)

    t1 = time.time()
    ut.out('prune redundant ids...')
    all_ids = _prune_redundant_ids(all_ids)
    ut.time(t1)

    t1 = time.time()
    ut.out('putting ids into a dataframe...')
    sim_df = _ids_to_dataframe(all_ids, df, in_col=in_col, out_col=out_col)
    ut.out('writing to csv...', 0)
    sim_df.to_csv(out_dir + fname, index=None)
    ut.time(t1)
    ut.out()
                        self.vote(driver, username, vote_url)
                        break
                    except UnexpectedAlertPresentException:
                        # Captcha Error
                        out(f"Retrying to vote for {username}")
                        continue


if __name__ == "__main__":
    bot = Votebot()

    usernames = get_lines(
        bot.conf["username_file"])  # Users to get the voting reward for
    vote_urls = get_lines(
        bot.conf["vote_url_file"]
    )  # URL to the vote page of a server on minecraft-server.eu

    bot.run(usernames, vote_urls)

    if bot.conf["use_timer"] == "True":
        while True:
            # calculate a randomized time for the next execution
            time_till_next_day = datetime.combine(
                datetime.now().date() + timedelta(days=1),
                datetime.strptime("0000", "%H%M").time()) - datetime.now()

            delay = time_till_next_day + timedelta(hours=random.randint(2, 23))
            out(f"Next execution in: {delay}")
            time.sleep(delay.seconds)
            bot.run(usernames, vote_urls)
Example #55
0
                        sys.stdout.write("(shell)")
                        extra_data_name = self.__save_shellvars(cwin, args[0].split("/")[-1])

                    cpids_data[i] = (
                        cpids_data[i][0],
                        cpids_data[i][1],
                        cpids_data[i][2],
                        cpids_data[i][3],
                        extra_data_name,
                    )

            scrollback_filename = os.path.join(findir, "hardcopy." + cwin)
            sys.stdout.write("%s %s; " % (cwin, ctypestr))
            errors += self.__save_win(cwin, ctypestr, cpids_data, ctime, rollback)
            rollback = (None, None, None)
        out("")

        # remove ignored scrollbacks

        if "all" in self.scroll:
            for f in glob.glob(os.path.join(findir, "hardcopy.*")):
                open(f, "w")
        elif self.scroll:
            import tools

            (scroll_groups, scroll_wins) = tools.subwindows(self.pid, sc.datadir, self.scroll)
            out("Scrollback excluded groups: %s" % str(scroll_groups))
            out("All scrollback excluded windows: %s" % str(scroll_wins))
            for w in scroll_wins:
                util.remove(os.path.join(findir, "hardcopy.%s" % w))
Example #56
0
    def __load_layouts(self):
        cdinfo = map(int, self.dinfo()[0:2])
        out("Terminal size: %s %s" % (cdinfo[0], cdinfo[1]))
        homewindow = self.homewindow
        (homelayout, homelayoutname) = self.get_layout_number()
        layout_trans = {}
        layout_c = len(glob.glob(os.path.join(self.basedir, self.savedir, "winlayout_*")))
        if layout_c > 0:
            self.__layouts_loaded = True
        lc = 0
        layout_file = sc.layout_begin(self.pid)
        while lc < layout_c:
            filename = None
            try:
                filename = glob.glob(os.path.join(self.basedir, self.savedir, "layout_%d" % lc))[0]
                layoutnumber = filename.rsplit("_", 1)[1]
                (head, tail) = os.path.split(filename)

                # the winlayout_NUM files contain "dumpscreen layout" output
                # (see GNUScreen.Regions class)

                filename2 = os.path.join(head, "win" + tail)
                regions = sc.get_regions(filename2)
                status = self.get_layout_new(regions.title)
                if not status:
                    sys.stderr.write(
                        "\nMaximum number of layouts reached. Ignoring layout %s (%s).\n"
                        % (layoutnumber, regions.title)
                    )
                    break
                else:
                    if self.exact:
                        self.layout("number %s" % layoutnumber, False)
                        currentlayout = layoutnumber
                    else:
                        currentlayout = self.get_layout_number()[0]
                    layout_trans[layoutnumber] = currentlayout

                    sc.layout_select_layout(currentlayout)
                    # source the output produced by "layout dump"
                    sc.layout_load_dump(open(filename, "r"))

                    regions_size = []
                    winlist = []

                    for (window, sizex, sizey) in regions.regions:
                        winlist.append(window)
                        regions_size.append((sizex, sizey))
                    sc.layout_load_regions(regions, self.__wins_trans, cdinfo[0], cdinfo[1])
                    # sys.stdout.write(" %s (%s);" % (layoutnumber, regions.title))
            except:
                # import traceback
                # traceback.print_exc(file=sys.stderr)
                # raise
                layout_c += 1
                if layout_c > 2000:
                    sys.stderr.write("\nErrors during layouts loading.\n")
                    break
            lc += 1
        out("")
        if not lc == 0:

            # select last layout

            lastname = None
            lastid_l = None

            if homelayout != -1:
                out("Returning homelayout %s" % homelayout)
                layout_file.write("layout select %s" % homelayout)
            else:
                sys.stderr.write("No homelayout - unable to return.\n")

            if os.path.exists(os.path.join(self.basedir, self.savedir, "last_layout")) and len(layout_trans) > 0:
                last = os.readlink(os.path.join(self.basedir, self.savedir, "last_layout"))
                (lasthead, lasttail) = os.path.split(last)
                last = lasttail.split("_", 2)
                lastid_l = last[1]
                try:
                    out("Selecting last layout: %s (%s)" % (layout_trans[lastid_l], lastid_l))
                    layout_file.write("layout select %s" % layout_trans[lastid_l])
                    # ^^ layout numbering may change, use layout_trans={}
                except:
                    sys.stderr.write("Unable to select last layout %s\n" % lastid_l)
        else:
            self.enable_layout = False
        sc.layout_end()
Example #57
0
                        help='chunk size to use: %(default)s')
    parser.add_argument('-c', '--chunk', default=-1, type=int,
                        help='chunk to use: %(default)s')
    args = parser.parse_args()

    domain = args.domain
    info_type = args.info_type
    approx_datapoints = args.approx_datapoints
    sim_thresh = args.sim_thresh
    max_feats = int(args.max_feats) if args.max_feats is not None else None
    k = args.topk
    chunk_size = args.chunk_size
    chunk = args.chunk

    t = (domain, info_type, approx_datapoints, sim_thresh, k)
    ut.out('d: %s, i: %s, a: %d, s: %.2f, k: %d' % t)
    if max_feats is not None:
        ut.out(', m: %d' % max_feats, 0)

    in_dir = 'independent/data/' + domain + '/extractions/'
    out_dir = 'independent/data/' + domain + '/similarities/'
    df = pd.read_csv(in_dir + info_type + '.csv')

    fname = str(chunk) + '_' + info_type + '_sim.csv'

    df = retrieve_chunk(df, chunk_number=chunk, max_size=chunk_size)
    max_id = retrieve_max_id(out_dir, chunk_number=chunk, info_type=info_type)
    cosine_similarities(df, in_col=info_type, out_col=info_type + '_id',
                        out_dir=out_dir, fname=fname,
                        max_feats=max_feats, k=k,
                        approx_datapoints=approx_datapoints,
    def vote(self, driver, username, vote_url):
        # TODO set viewport depending on whether a mobile or desktop useragent is used
        self.set_viewport_size(driver, 1920, 1080)
        driver.get(vote_url)

        # time.sleep(5)  # Wait for the page to properly load

        try:
            # Accept TOS
            tos_box = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located(
                    (By.CLASS_NAME, 'qc-cmp-ui-content')))
            actions = ActionChains(driver)
            actions.move_to_element(tos_box).perform()
            tos_box.click()
            submit_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located(
                    (By.XPATH, "/html/body/div[1]/div/div/div[2]/button[2]")))
            submit_button.click()

        except TimeoutException:
            pass  # No TOS popup

        time.sleep(2)

        try:
            # We use .find_element_by_id here because we know the id
            text_input = driver.find_element_by_id("playername")

            time.sleep(10)

            text_input.click()

            # Then we'll fake typing into it
            text_input.send_keys(username)

            time.sleep(2)
        except NoSuchElementException:
            pass  # Users cannot recieve rewards for voting

        # Now we can grab the submit button and click it
        submit_button = driver.find_element_by_id("captcha")
        submit_button.click()

        time.sleep(4)

        try:
            # Try to solve a captcha with the browser extension Buster
            driver.switch_to.frame(
                driver.find_element_by_xpath(
                    '//*[@title="recaptcha challenge"]'))
            time.sleep(3)
            buster_button = driver.find_element_by_xpath(
                '//*[@id="solver-button"]')
            buster_button.click()
        except NoSuchElementException:
            pass  # No captcha

        # TODO Optimize the url check
        i = 0
        while True:
            if "success" in driver.current_url or "fail" in driver.current_url:
                current_url = driver.current_url
                break
            elif i == 5:
                out(f"Captcha failed for {username}")
                raise UnexpectedAlertPresentException
            i += 1
            time.sleep(1)

        if "success" in current_url:
            out(f"Voted successfully for {username}")
        elif "fail" in current_url:
            out(f"Couldn't vote for {username}")

        driver.close()
Example #59
0
def _spread(df, col='ind_pred', relations=[]):
    """This'll give some post-hoc test-set analysis, when running this,
    keep track of the test sets that improved using relational modeling,
    then average those test set statistics together to compare to the test
    sets that did not improve."""
    t1 = ut.out('computing subgraph statistics...')
    con_obj = Connections()

    gids = [r[2] for r in relations]
    g, sgs = con_obj.find_subgraphs(df, relations, verbose=False)
    spread_dict = {}

    sg_list = []
    for i, sg in enumerate(sgs):
        if sg[3] > 0:  # num edges > 0
            sg_list.extend([(x, i) for x in sg[0]])  # give sg_id

    if len(sg_list) == 0:
        return spread_dict

    sg_df = pd.DataFrame(sg_list, columns=['com_id', 'sg_id'])
    df = df.merge(sg_df, how='left')
    df['sg_id'] = df['sg_id'].fillna(-1).apply(int)

    p, r, ts = precision_recall_curve(df['label'], df[col])
    aupr = average_precision_score(df['label'], df[col])
    mp = 1.0 - aupr

    corrects = []
    step = int(len(ts) / 100) if len(ts) > 100 else 1
    for i in range(0, len(ts), step):
        t = ts[i]
        df['pred'] = np.where(df[col] > t, 1, 0)
        correct = df['pred'] == df['label']
        corrects.append(correct.apply(int))

    total_corrects = [sum(x) for x in zip(*corrects)]
    df['correct'] = total_corrects

    # extract bottom x% data
    df = df.sort_values('correct', ascending=False)
    ndx = len(df) - int(len(df) * mp)
    qfs = df[df['label'] == 1]
    qfo = df[df['label'] == 0]
    qf1, qf2 = df[ndx:], df[:ndx]
    qf1s = qf1[qf1['label'] == 1]  # low performers
    qf1o = qf1[qf1['label'] == 0]  # low performers
    qf2s = qf2[qf2['label'] == 1]  # high performers
    qf2o = qf2[qf2['label'] == 0]  # high performers

    spread_dict['spam_mean'] = round(qfs['ind_pred'].mean(), 4)
    spread_dict['spam_median'] = round(qfs['ind_pred'].median(), 4)
    spread_dict['ham_mean'] = round(qfo['ind_pred'].mean(), 4)
    spread_dict['ham_median'] = round(qfo['ind_pred'].median(), 4)

    for nm, temp_df in [('bot_spam', qf1s), ('bot_ham', qf1o),
                        ('top_spam', qf2s), ('top_ham', qf2o)]:
        wf = temp_df[(temp_df[gids] != -1).any(axis=1)]
        sg_mean = wf.groupby('sg_id')['ind_pred'].mean().reset_index()\
            .rename(columns={'ind_pred': 'sg_mean'})
        sg_std = wf.groupby('sg_id')['ind_pred'].std().reset_index()\
            .rename(columns={'ind_pred': 'sg_std'})
        sg_median = wf.groupby('sg_id')['ind_pred'].median().reset_index()\
            .rename(columns={'ind_pred': 'sg_median'})
        sg_min = wf.groupby('sg_id')['ind_pred'].min().reset_index()\
            .rename(columns={'ind_pred': 'sg_min'})
        sg_max = wf.groupby('sg_id')['ind_pred'].max().reset_index()\
            .rename(columns={'ind_pred': 'sg_max'})
        wf = wf.merge(sg_mean).merge(sg_std).merge(sg_median)\
            .merge(sg_min).merge(sg_max)
        wf['sg_spread'] = wf['sg_max'] - wf['sg_min']

        spread_dict[nm + '_sg_mean'] = round(np.mean(wf['sg_mean']), 4)
        spread_dict[nm + '_sg_std'] = round(np.mean(wf['sg_std']), 4)
        spread_dict[nm + '_sg_median'] = round(np.mean(wf['sg_median']), 4)
        spread_dict[nm + '_sg_min'] = round(np.mean(wf['sg_min']), 4)
        spread_dict[nm + '_sg_max'] = round(np.mean(wf['sg_max']), 4)
        spread_dict[nm + '_sg_spread'] = round(np.mean(wf['sg_spread']), 4)

    ut.time(t1)
    return spread_dict
Example #60
0
def compute_big_aupr(start_fold=0, ref_start_fold=-1, num_folds=5,
                     domain='twitter', models=['ind'], in_dir='', gids=[]):
    ind_data_dir = 'independent/data/' + domain + '/'

    lines = {'ind': 'b-', 'mrf': 'g--', 'psl': 'm-.', 'mean': 'r:',
             'median': 'c:', 'max': 'y:'}
    inds, mrfs, psls, approxs, refs = [], [], [], [], []
    preds = []

    gen_obj = Generator()
    relations = _relations_for_gids(gids)

    for model in models:
        preds.append(model + '_pred')
    if 'approx' in models:
        models.remove('approx')
        models.extend(['mean', 'median', 'max'])
        preds.extend(['mean_pred', 'median_pred', 'max_pred'])
    preds = list(zip(models, preds))

    t1 = ut.out('reading true labels...', 0)
    full_df = pd.read_csv(ind_data_dir + 'comments.csv')
    lbl_df = full_df[['com_id', 'label']]
    ut.time(t1)

    s = '%s: reading model preds from fold %d to %d:'
    ut.out(s % (domain, start_fold, start_fold + num_folds - 1), 1)

    newline = 1 if 'approx' in models else 0

    d = {}
    for i, fold in enumerate(range(start_fold, start_fold + num_folds)):
        ut.out('\nreading preds for fold %d...' % i, newline)
        f_dict = {}

        if ref_start_fold > -1:
            ndx = ref_start_fold + i
            fname = in_dir + 'test_' + str(ndx) + '_preds.csv'
            assert os.path.exists(fname)
            refs.append(pd.read_csv(fname))

        if 'ind' in models:
            fname = in_dir + 'test_' + str(fold) + '_preds.csv'
            assert os.path.exists(fname)
            ind_df = pd.read_csv(fname)
            inds.append(ind_df)
            ind_lbl_df = full_df.merge(ind_df, on='com_id')
            t1 = ut.out('generating group ids...')
            for gid in gids:
                ind_lbl_df = gen_obj.gen_group_id(ind_lbl_df, gid)
            ut.time(t1)
            m_dict = _metrics(ind_lbl_df)
            a_dict = _analyze(ind_lbl_df, relations=relations, col='ind_pred')
            s_dict = _spread(ind_lbl_df, col='ind_pred', relations=relations)
            f_dict.update(a_dict)
            f_dict.update(s_dict)
            f_dict.update(m_dict)

            if 'mean' in models:
                temp_df = full_df.merge(ind_df)

                t1 = ut.out('generating group ids...')
                for gid in gids:
                    temp_df = gen_obj.gen_group_id(temp_df, gid)
                ut.time(t1)

                approx_df = _approximations(temp_df, relations)
                approxs.append(approx_df)

        if 'mrf' in models:
            fname = in_dir + 'mrf_preds_' + str(fold) + '.csv'
            assert os.path.exists(fname)
            mrf_df = pd.read_csv(fname)
            mrfs.append(mrf_df)
            mrf_lbl_df = lbl_df.merge(mrf_df)
            m_dict = _metrics(mrf_lbl_df, col='mrf_pred', model='mrf')
            f_dict.update(m_dict)

        if 'psl' in models:
            fname = in_dir + 'psl_preds_' + str(fold) + '.csv'
            assert os.path.exists(fname)
            psl_df = pd.read_csv(fname)
            psls.append(psl_df)
            psl_lbl_df = lbl_df.merge(psl_df)
            m_dict = _metrics(psl_lbl_df, col='psl_pred', model='psl')
            f_dict.update(m_dict)

        d[i] = f_dict
        print(d)

    dicts = [d[i] for i in range(len(d))]
    stats_df = pd.DataFrame(dicts)
    stats_df = stats_df.reset_index()\
                       .rename(columns={'index': 'test_set'})
    stats_df.to_csv('tw_full_0stk.csv', index=None)

    t1 = ut.out('concatenating test set predictions...')
    df = full_df[['com_id', 'label']]

    if 'ind' in models:
        ind_df = pd.concat(inds)
        df = df.merge(ind_df)

        if 'mean' in models:
            approx_df = pd.concat(approxs)
            assert set(ind_df['com_id']) == set(approx_df['com_id'])
            df = df.merge(approx_df)

    if ref_start_fold > -1:
        ref_df = pd.concat(refs)
        ref_df = full_df[['com_id', 'label']].merge(ref_df)
        ref_df = ref_df[['com_id', 'ind_pred']]
        ref_df = ref_df.rename(columns={'ind_pred': 'ref_pred'})
        assert set(ind_df['com_id']) == set(ref_df['com_id'])
        df = df.merge(ref_df)

    if 'mrf' in models:
        mrf_df = pd.concat(mrfs)
        assert set(ind_df['com_id']) == set(mrf_df['com_id'])
        df = df.merge(mrf_df)

    if 'psl' in models:
        psl_df = pd.concat(psls)
        assert set(ind_df['com_id']) == set(psl_df['com_id'])
        df = df.merge(psl_df)
    ut.time(t1)

    t1 = ut.out('applying noise to predictions...')
    noise = 0.000025
    perturb = lambda x: max(0.0, min(1.0, x + ran.uniform(-noise, noise)))

    if 'ind' in models:
        df['ind_pred'] = df['ind_pred'].apply(perturb)

        if 'mean' in models:
            df['mean_pred'] = df['mean_pred'].apply(perturb)
            df['median_pred'] = df['median_pred'].apply(perturb)
            df['max_pred'] = df['max_pred'].apply(perturb)

    if 'mrf' in models:
        df['mrf_pred'] = df['mrf_pred'].apply(perturb)

    if 'psl' in models:
        df['psl_pred'] = df['psl_pred'].apply(perturb)
    ut.time(t1)

    # compute reference aupr and auroc
    ref_label, ref_pred = df['label'], df['ref_pred']
    ref_aupr = average_precision_score(ref_label, ref_pred)
    ref_auroc = roc_auc_score(ref_label, ref_pred)
    ref_p, ref_r, ref_t = precision_recall_curve(ref_label, ref_pred)
    ref_fpr, ref_tpr, ref_t2 = roc_curve(ref_label, ref_pred)
    ut.out('%s aupr: %.4f, auroc: %.4f' % ('reference', ref_aupr, ref_auroc))

    ut.plot_pr_curve('ref', ref_p, ref_r, ref_aupr, domain=domain,
                     line='k-', show_legend=True)
    ut.plot_roc_curve('ref', ref_tpr, ref_fpr, ref_auroc, domain=domain,
                      line='k-', show_legend=True)

    auroc_pval, aupr_pval = 0, 0
    # compute combined test set curves
    for i, (model, pred) in enumerate(preds):
        aupr = average_precision_score(df['label'], df[pred])
        auroc = roc_auc_score(df['label'], df[pred])
        p, r, _ = precision_recall_curve(df['label'], df[pred])
        fpr, tpr, _ = roc_curve(df['label'], df[pred])
        # aupr_pval, auroc_pval = _significance(df, pred)
        t = (model, aupr, aupr_pval, auroc, auroc_pval)
        ut.out('%s aupr: %.4f (%.4f), auroc: %.4f (%.4f)' % t)

        save = True if i == len(preds) - 1 else False
        ut.plot_pr_curve(model, p, r, aupr, domain=domain,
                         line=lines[model], show_legend=True)
        ut.plot_roc_curve(model, tpr, fpr, auroc, save=save, domain=domain,
                          line=lines[model], show_legend=True)
    ut.out()