Esempio n. 1
0
def do_prediction(self, intbl, selections, gene_names,
                  filteropt=1, filterval=1, spec_ecutoff=0.4, nonspec_ecutoff=0.35):
    '''
    intbl: preprocessed table
    filteropt: 1 for highest t-val, 2 for p-val cutoff
    filterval: # TFs for opt 1 and p-val cutoff for opt 2
    '''

    if type(intbl) is str: # got an error in the pipeline from inittbl
        return {'current': 1, 'total': 1, 'error': intbl}

    # intbl: #rowidx,seq,val,diff,t,pbmname,escore_seq
    start_time = time.time()

    #while not inittask.ready():
    #    time.sleep(1)
    #intbl = inittask.get()
    predfiles = [app.config['PREDDIR'] + "/" + s for s in selections] # os.listdir(preddir)
    preds = [l for l in utils.chunkify(predfiles,app.config['PCOUNT']) if len(l) != 0] # chunks the predfiles for each process

    # collect the short2long_map -- shared, so only one i/o
    emap = pd.read_csv("%s/index_short_to_long.csv" % (app.config["ESCORE_DIR"]), header=0, index_col=0, sep=',', dtype='Int32') # pd.DataFrame
    emap = np.array(emap[emap.columns[0]]) - 1 #emap[emap.columns[0]].to_numpy() - 1

    # ---- MULTIPROCESSING PART ----
    pool = mp.Pool(processes=app.config['PCOUNT'])
    # need to use manager here
    shared_ready_sum = mp.Manager().Value('i', 0)

    predict_partial = ft.partial(predict, **{'dataset':intbl, 'ready_count':shared_ready_sum, 'emap':emap,
            'filteropt':filteropt, 'filterval':filterval, 'spec_ecutoff':spec_ecutoff, 'nonspec_ecutoff':nonspec_ecutoff})
    async_pools = [pool.apply_async(predict_partial, (preds[i], )) for i in range(0,len(preds))]

    # run the job, update progress bar
    total = len(predfiles)
    while not all([p.ready() for p in async_pools]):
        time.sleep(2) # super important to avoid checking every loop
        self.update_state(state='PROGRESS',
                          meta={'current': shared_ready_sum.value, 'total': total, 'status': 'Processing input data...'})

    res = [p.get() for p in async_pools]

    self.update_state(state='PROGRESS',
                          meta={'current': shared_ready_sum.value, 'total': total, 'status': 'post-processing'})
    print("Terminate all children process..")
    pool.terminate() # terminate to kill all child processes !!! Like.. super important,
                     # to avoid memory leak, seriously...
    datavalues = postprocess(res,predfiles,gene_names,filteropt,filterval)

    ''' SET the values in redis '''
    #print("marktesting",colnames,datavalues)
    savetomongo(self.request.id, datavalues.to_dict('records') ,app.config['USER_DATA_EXPIRY'])
    # significance_score can be z-score or p-value depending on the out_type

    #db.expire("%s:vals:*" % self.request.id, app.config['USER_DATA_EXPIRY'])

    return {'current': shared_ready_sum.value, 'total': len(predfiles), 'status': 'Task completed!',
            'result': 'done', 'taskid': self.request.id,
            'time':(time.time()-start_time)} # -- somehow cannot do jsonify(postproc)
Esempio n. 2
0
    def __init__(self, depth, threading):
        # Logger already setup by config, just get an instance
        logobj = logging.getLogger('eventgen')
        from eventgenconfig import EventgenAdapter
        adapter = EventgenAdapter(logobj, {
            'module': 'Queue',
            'sample': 'null'
        })
        self.logger = adapter

        # logger.info("Creating Queue of depth %d, threading %s" % (depth, threading))
        if threading == 'thread':
            self.q = PQueue.Queue(depth)
        else:
            self.q = multiprocessing.Manager().Queue(depth)

        self.depth = depth
Esempio n. 3
0
    def __init__(self, blocking=True, db_path=None, ncpu=1):
        """
        Init function

        Parameter
        ---------
        blocking: bool
            determines whether join() blocks or not
        db_path: str
            the string to a LevelDB for command persistence
        """
        self.__blocking = blocking
        self.__broker_queue = mp.Queue()
        self.__job_queue = mp.JoinableQueue()
        self.__pending_dict = mp.Manager().dict()
        self.__results_queue = mp.Queue()
        self.__results_queue_worker = mp.Queue()

        if db_path is None:
            tmp_db = NamedTemporaryFile(delete=False,
                                        dir=os.getcwd(),
                                        suffix=".db")
            tmp_db.close()
            self.__is_temp_db = True
            self.__db_path = tmp_db.name
        else:
            self.__is_temp_db = False
            self.__db_path = db_path

        self.__broker = _Broker(self.__broker_queue,
                                self.__job_queue,
                                self.__results_queue,
                                self.__results_queue_worker,
                                self.__pending_dict,
                                db_path=self.__db_path)
        self.__broker.daemon = False
        self.__broker.start()

        self.__worker = []
        for i in range(ncpu):
            p = _Worker(self.__broker_queue, self.__job_queue,
                        self.__results_queue_worker)
            p.daemon = False
            self.__worker.append(p)
            p.start()
Esempio n. 4
0
    def __init__(self,
                 targets,
                 ports=range(65536),
                 threads=100,
                 timeout=3,
                 proxy_ip=["127.0.0.1", "127.0.0.1"],
                 proxy_port=[80, 80]):

        self._targets_ = targets
        self._ports_ = ports
        self._threads_ = threads
        self._timeout_ = timeout
        self._proxy_ip_ = proxy_ip
        self._proxy_port_ = proxy_port

        self._worker_pool_ = []
        self._worker_count_ = cpu_count()
        self._job_len_ = len(targets)

        self._scanners_ = [
            Scan(self._targets_[i], self._ports_, self._threads_,
                 self._timeout_, self._proxy_ip_, self._proxy_port_)
            for i in range(self._job_len_)
        ]

        self._scan_secure_ = [
            Scan(self._targets_[i], self._ports_, self._threads_,
                 self._timeout_, self._proxy_ip_[0], self._proxy_port_[0])
            for i in range(self._job_len_)
        ]

        self._scan_unsecure_ = [
            Scan(self._targets_[i], self._ports_, self._threads_,
                 self._timeout_, self._proxy_ip_[1], self._proxy_port_[1])
            for i in range(self._job_len_)
        ]

        self._manager_ = billiard.Manager()
        self._log_ = self._manager_.dict()
        self._proxy_log_ = self._manager_.dict()
        self._total_runtime_ = 0
Esempio n. 5
0
from pygtftk.utils import GTFtkError
from pygtftk.utils import add_prefix_to_file
from pygtftk.utils import close_properly
from pygtftk.utils import flatten_list
from pygtftk.utils import intervals
from pygtftk.utils import make_tmp_file
from pygtftk.utils import message

# -------------------------------------------------------------------------
# TMP_FILE_POOL_MANAGER stores temporary file name
# make_tmp_file_pool is function that add temporary files to TMP_FILE_POOL_MANAGER
# TMP_FILE_POOL_MANAGER will be updated by workers (in contrast to a global
# variable)
# -------------------------------------------------------------------------

TMP_FILE_POOL_MANAGER = multiprocessing.Manager().list()


def make_tmp_file_pool(prefix='tmp',
                       suffix='',
                       store=True,
                       dir=None):
    """
    This

    :Example:

    >>> from pygtftk.utils import make_tmp_file_pool
    >>> tmp_file = make_tmp_file_pool()
    >>> assert os.path.exists(tmp_file.name)
    >>> tmp_file = make_tmp_file_pool(prefix="pref")
def do_prediction(self,
                  intbl,
                  selections,
                  gene_names,
                  filteropt=1,
                  filterval=1,
                  spec_ecutoff=0.4,
                  nonspec_ecutoff=0.35):
    '''
    intbl: preprocessed table
    filteropt: 1 for highest t-val, 2 for p-val cutoff
    filterval: # TFs for opt 1 and p-val cutoff for opt 2
    '''

    if type(intbl) is str:  # got an error in the pipeline from inittbl
        return {'current': 1, 'total': 1, 'error': intbl}

    # intbl: #rowidx,seq,val,diff,t,pbmname,escore_seq
    start_time = time.time()

    #while not inittask.ready():
    #    time.sleep(1)
    #intbl = inittask.get()

    # move the comment here for testing
    pool = mp.Pool(processes=app.config['PCOUNT'])
    predfiles = [app.config['PREDDIR'] + "/" + s
                 for s in selections]  # os.listdir(preddir)
    preds = utils.chunkify(
        predfiles,
        app.config['PCOUNT'])  # chunks the predfiles for each process

    # need to use manager here
    shared_ready_sum = mp.Manager().Value('i', 0)

    async_pools = [
        pool.apply_async(predict,
                         (preds[i], intbl, shared_ready_sum, filteropt,
                          filterval, spec_ecutoff, nonspec_ecutoff))
        for i in range(0, len(preds))
    ]

    # run the job, update progress bar
    total = len(predfiles)
    while not all([p.ready() for p in async_pools]):
        time.sleep(2)  # super important to avoid checking every loop
        self.update_state(state='PROGRESS',
                          meta={
                              'current': shared_ready_sum.value,
                              'total': total,
                              'status': 'Processing input data...'
                          })

    res = [p.get() for p in async_pools]
    self.update_state(state='PROGRESS',
                      meta={
                          'current': shared_ready_sum.value,
                          'total': total,
                          'status': 'post-processing'
                      })
    print("Terminate all children process..")
    pool.terminate(
    )  # terminate to kill all child processes !!! Like.. super important,
    # to avoid memory leak, seriously...
    colnames, datavalues = postprocess(res, gene_names, filteropt, filterval)
    ''' SET the values in redis '''
    #print("marktesting",colnames,datavalues)
    savetoredis(self.request.id, colnames, datavalues,
                app.config['USER_DATA_EXPIRY'])
    # significance_score can be z-score or p-value depending on the out_type

    #db.expire("%s:vals:*" % self.request.id, app.config['USER_DATA_EXPIRY'])

    return {
        'current': shared_ready_sum.value,
        'total': len(predfiles),
        'status': 'Task completed!',
        'result': 'done',
        'taskid': self.request.id,
        'time': (time.time() - start_time)
    }  # -- somehow cannot do jsonify(postproc)
Esempio n. 7
0
def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce,
         engines):
    bruteforce_list = set()
    search_list = set()

    if is_windows:
        subdomains_queue = list()
    else:
        subdomains_queue = billiard.Manager().list()

    # Check Bruteforce Status
    if enable_bruteforce or enable_bruteforce is None:
        enable_bruteforce = True

    # Validate domain
    domain_check = re.compile(
        "^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$")
    if not domain_check.match(domain):
        if not silent:
            print(R + "Error: Please enter a valid domain" + W)
        return []

    if not domain.startswith('http://') or not domain.startswith('https://'):
        domain = 'http://' + domain

    parsed_domain = urlparse.urlparse(domain)

    if not silent:
        print(B +
              "[-] Enumerating subdomains now for %s" % parsed_domain.netloc +
              W)

    if verbose and not silent:
        print(
            Y +
            "[-] verbosity is enabled, will show the subdomains results in realtime"
            + W)

    supported_engines = {
        'baidu': BaiduEnum,
        'yahoo': YahooEnum,
        'google': GoogleEnum,
        'bing': BingEnum,
        'ask': AskEnum,
        'netcraft': NetcraftEnum,
        'dnsdumpster': DNSdumpster,
        'virustotal': Virustotal,
        'threatcrowd': ThreatCrowd,
        'ssl': CrtSearch,
        'passivedns': PassiveDNS
    }

    chosenEnums = []

    if engines is None:
        chosenEnums = [
            BaiduEnum, YahooEnum, GoogleEnum, BingEnum, AskEnum, NetcraftEnum,
            DNSdumpster, Virustotal, ThreatCrowd, CrtSearch, PassiveDNS
        ]
    else:
        engines = engines.split(',')
        for engine in engines:
            if engine.lower() in supported_engines:
                chosenEnums.append(supported_engines[engine.lower()])

    # Start the engines enumeration
    enums = [
        enum(domain, [], q=subdomains_queue, silent=silent, verbose=verbose)
        for enum in chosenEnums
    ]
    for enum in enums:
        enum.start()
    for enum in enums:
        enum.join()

    subdomains = set(subdomains_queue)
    for subdomain in subdomains:
        search_list.add(subdomain)

    if enable_bruteforce:
        if not silent:
            print(G + "[-] Starting bruteforce module now using subbrute.." +
                  W)
        record_type = False
        path_to_file = os.path.dirname(os.path.realpath(__file__))
        subs = os.path.join(path_to_file, 'subbrute', 'names.txt')
        resolvers = os.path.join(path_to_file, 'subbrute', 'resolvers.txt')
        process_count = threads
        output = False
        json_output = False
        bruteforce_list = subbrute.print_target(parsed_domain.netloc,
                                                record_type, subs, resolvers,
                                                process_count, output,
                                                json_output, search_list,
                                                verbose)

    subdomains = search_list.union(bruteforce_list)

    if subdomains:
        subdomains = sorted(subdomains, key=subdomain_sorting_key)

        if savefile:
            write_file(savefile, subdomains)

        if not silent:
            print(Y +
                  "[-] Total Unique Subdomains Found: %s" % len(subdomains) +
                  W)

        if ports:
            if not silent:
                print(G +
                      "[-] Start port scan now for the following ports: %s%s" %
                      (Y, ports) + W)
            ports = ports.split(',')
            pscan = portscan(subdomains, ports)
            pscan.run()

        elif not silent:
            for subdomain in subdomains:
                print(G + subdomain + W)
    return subdomains