def get(self, wiki_id, brand):
        '''
        :param wiki_id: string
        :param brand: string name of brand
        '''
        page_doc_response = ListDocIdsService().get(wiki_id)
        if page_doc_response['status'] != 200:
            return page_doc_response

        if USE_MULTIPROCESSING:
            m = Manager()
            d = m.list()
            l = m.list()
            done_ids = dict([(line.split(',')[0], True) for line in open('data_output.json', 'r')])
            print len(done_ids)
            doc_ids = filter(lambda x: not done_ids.get(x, False), page_doc_response[wiki_id])
            #sents = [a for b in Pool(processes=MP_NUM_CORES).map(add_brand_sent_sentiment, [(d, l, i, brand) for i in doc_ids]) for a in b]
            Pool(processes=MP_NUM_CORES).map(add_brand_sent_sentiment, [(d, l, i, brand) for i in doc_ids])
        else:
            ses = SentencesForEntityService()
            total = len(page_doc_response[wiki_id])
            counter = 0
            sents = []
            for doc_id in page_doc_response[wiki_id]:
                resp = ses.get(doc_id, brand)
                sents += (doc_id, resp.get(brand, []))
                counter += 1
                print "%d / %d" % (counter, total)

        return {'status': 200, brand: dict(sents)}
 def scanner_network(self,gateway):
     scan = ''
     config_gateway = gateway.split('.')
     del config_gateway[-1]
     for i in config_gateway:
         scan += str(i) + '.'
     gateway = scan
     ranger = str(self.ip_range.text()).split('-')
     jobs = []
     manager = Manager()
     on_ips = manager.dict()
     for n in xrange(int(ranger[0]),int(ranger[1])):
         ip='%s{0}'.format(n)%(gateway)
         p = Process(target=self.working,args=(ip,on_ips))
         jobs.append(p)
         p.start()
     for i in jobs: i.join()
     for i in on_ips.values():
         Headers = []
         n = i.split('|')
         self.data['IPaddress'].append(n[0])
         self.data['MacAddress'].append(n[1])
         self.data['Hostname'].append('<unknown>')
         for n, key in enumerate(reversed(self.data.keys())):
             Headers.append(key)
             for m, item in enumerate(self.data[key]):
                 item = QTableWidgetItem(item)
                 item.setTextAlignment(Qt.AlignVCenter | Qt.AlignCenter)
                 self.tables.setItem(m, n, item)
     Headers = []
     for key in reversed(self.data.keys()):
         Headers.append(key)
     self.tables.setHorizontalHeaderLabels(Headers)
Beispiel #3
0
def getData():
    if os.path.isfile("chat_urls.p"):
        chat_urls = pickle.load( open( "chat_urls.p", "rb" ) )
    else:
        chat_urls = {}
        for user in users:
            chat_urls[user] = get_urls(user)
        teams_url = "http://espn.go.com/mlb/teams"
        pickle.dump( chat_urls, open( "chat_urls.p", "wb" ) )

    # for user in chat_urls:
    #     urls = chat_urls[user]
    #     for url in urls:
    #         getLog(url)
    logDB = {}
    for user in chat_urls:
        logDB[user] = {}
    p = Pool(20)
    i=0
    manager = Manager()
    db = manager.dict()
    for user in chat_urls:
        for url in chat_urls[user]:
            i+=1
            p.apply_async(addLogData, args=(url,db))
    p.close()
    p.join()
    out = db._getvalue()
    outfile = open("rawChat.txt","wb")
    for url in out:
        outfile.write(out[url]+"\n")
Beispiel #4
0
def controller_failure_unit_test():
    s = ["1001"]
    s1 = ["1002"]
    clear_config(s)
    clear_config(s1)
    manager1 = Manager()
    manager2 = Manager()
    failure1 = manager1.Value('i', 0)
    failed_list1 = manager1.list([])

    failure2 = manager2.Value('i', 0)
    failed_list2 = manager2.list([])
    processes = []
    process2 = mp.Process(target=controller_failure_detection, args=(s, '1', failure1, failed_list1,))
    processes.append(process2)
    process4 = mp.Process(target=controller_failure_detection, args=(s, '2', failure2, failed_list2,))
    processes.append(process4)
    for p in processes:
        p.start()
        print 'STARTING:', p, p.is_alive()
    r = random.randint(1, 10)
    time.sleep(r)
    print 'terminated'
    t1 = time.time()
    logging.debug(str( ["controller failed at:"] + [t1]))
    processes[0].terminate()
# Exit the completed processes
    for p in processes:
        p.join()
        print 'JOINED:', p, p.is_alive()
def processFiles(patch_dir):
    root = os.getcwd()
    glbl.data_dirs = {}
    if root != patch_dir: working_path = root+"/"+patch_dir
    else: working_path = root

    for path, dirs, files in os.walk(working_path):
        if len(dirs) == 0: glbl.data_dirs[path] = ''
    

    # Multiprocessing Section
    #########################################
    Qids = glbl.data_dirs.keys()
    manager = Manager()                                      # creates shared memory manager object
    results = manager.dict()                                 # Add dictionary to manager, so it can be accessed across processes
    nextid = Queue()                                         # Create Queue object to serve as shared id generator across processes
    for qid in Qids: nextid.put(qid)                         # Load the ids to be tested into the Queue
    for x in range(0,multiprocessing.cpu_count()):           # Create one process per logical CPU
        p = Process(target=processData, args=(nextid,results)) # Assign process to processCBR function, passing in the Queue and shared dictionary
        glbl.jobs.append(p)                                   # Add the process to a list of running processes
        p.start()                                             # Start process running
    for j in glbl.jobs:
        j.join()                                              # For each process, join them back to main, blocking on each one until finished
    
    # write out results
    c = 1
    sets = results.keys()
    sets.sort()
    for x in sets:
        if results[x] != 'None':
            FINAL = open('result'+str(c)+'.txt','w')
            n = "\n************************************************************************************************\n"
            FINAL.write(n+"* "+x+'    *\n'+n+results[x]+"\n")
            FINAL.close()     
            c += 1
Beispiel #6
0
    def __init__(self, firefox=None, email=None, senha=None, pasta=None):
        """'firefox' é o caminho para o binário do Firefox a ser usado.
        'pasta' é o caminho para a pasta onde salvar os downloads."""
        self.firefox = firefox
        self.pasta = pasta
        self.email = email
        self.senha = senha

        self.navegador = None
        self.app = None
        self.logger = None

        manager = Manager()
        self.safe_dict = manager.dict()
        self.clear_captcha()
        self.stop()

        self.try_break_audio_captcha = True
        self.nome_audio_captcha = "somCaptcha.wav"
        self.recognizer = sr.Recognizer(str('pt-BR'))

        self.user_agent = (
            "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:28.0)"
            " Gecko/20100101  Firefox/28.0"
        )
        self.base_url = 'http://esic.prefeitura.sp.gov.br'
        self.login_url = self.base_url + '/Account/Login.aspx'

        self.logado = False
        self.ja_tentou_cookies_salvos = False
        self.rodar_apenas_uma_vez = False
Beispiel #7
0
 def correction_terms_threaded(self):
     '''Finds the correction terms assoctiated to the quadratic form,
     for each of the equivalance classes it finds the maximum by 
     iterating through the relation vectors of the group. 
     
     Uses multiprocessing.'''
     print 'Using multiprocessing'
     pool = Pool() # default: processes=None => uses cpu_count()
     manager = Manager()
     start_time = time.time()
     coef_lists = lrange(self.group.structure)
     # representatives = elements of C_1(V) (np.matrix)
     representatives = map(lambda l: self.find_rep(l), coef_lists)
     # list of maxes        
     lst = manager.list([None for i in xrange(len(representatives))]) 
     alphalist = list(self.get_alpha()) # cannot pickle generators
     pool.map_async(functools.partial(process_alpha_outside, self, 
                                      representatives, lst), alphalist)
     pool.close()
     pool.join() # wait for pool to finish
     # get corrterms via (|alpha|^2+b)/4
     print 'Computed from quadratic form in %g seconds' \
           % (time.time() - start_time)
     return [Fraction(Fraction(alpha, self.int_inverse[1]) + self.b, 4) \
                     for alpha in lst]            
Beispiel #8
0
class MemStorage:
    def __init__(self, config):
        self.config = config
        self.measures = Manager().list()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        pass

    def save(self, measure):
        self.measures.append(measure)

    def last(self):
        if len(self.measures) <= 0:
            return None

        return self.measures[-1]

    def __str__(self):
        buf = "<{} measures: [".format(self.__class__)
        for item in self.measures:
            buf += "'{}'".format(item)
        buf += "]>"

        return buf
def main():
    init_params()
    vk = connect_to_vk(LOGIN, PASSWORD)
    audio_list = vk.method('audio.get', {})

    total = len(audio_list)

    if not os.path.exists(DOWNLOAD_DIR):
        os.makedirs(DOWNLOAD_DIR)

    manager = Manager()
    workers_list = []
    progress_list = manager.dict()
    downloaded_tracks = manager.Value('i', 0)
    lock = Lock()

    for f in audio_list[:WORKERS_COUNT - 1]:
        start_download_process(f, workers_list, progress_list, downloaded_tracks, lock)

    del audio_list[:WORKERS_COUNT - 1]

    while any(worker.is_alive() for worker in workers_list) or len(audio_list):
        if audio_list and len(workers_list) < WORKERS_COUNT:
            f = audio_list.pop(0)
            start_download_process(f, workers_list, progress_list, downloaded_tracks, lock)
        print_progress(progress_list, downloaded_tracks.value, total)
        clean_workers(workers_list)
        time.sleep(0.1)
    print "Done!"
Beispiel #10
0
    def multiupload(self, filename, hosts):
        """Upload file to multiple hosts simultaneously

        The upload will be attempted for each host until the optimal file
        redundancy is achieved (a percentage of successful uploads) or the host
        list is depleted.

        Args:
            filename (str): The filename of the file to upload.
            hosts (list): A list of hosts as defined in the master host list.
        Returns:
            A list of dicts with 'host_name' and 'url' keys for all successful
            uploads or an empty list if all uploads failed.

        """
        manager = Manager()
        successful_uploads = manager.list([])

        def f(host):
            if len(successful_uploads)/float(len(hosts)) < settings.MIN_FILE_REDUNDANCY:
                # Optimal redundancy not achieved, keep going
                result = self.upload_to_host(filename, host)
                if 'error' in result:
                    self._host_errors[host] += 1
                else:
                    successful_uploads.append(result)

        multiprocessing.dummy.Pool(len(hosts)).map(f, self._hosts_by_success(hosts))

        return list(successful_uploads)
Beispiel #11
0
    def download(self, sources, output_directory, filename):
        """Download a file from one of the provided sources

        The sources will be ordered by least amount of errors, so most
        successful hosts will be tried first. In case of failure, the next
        source will be attempted, until the first successful download is
        completed or all sources have been depleted.

        Args:
            sources: A list of dicts with 'host_name' and 'url' keys.
            output_directory (str): Directory to save the downloaded file in.
            filename (str): Filename assigned to the downloaded file.
        Returns:
            A dict with 'host_name' and 'filename' keys if the download is
            successful, or an empty dict otherwise.

        """
        valid_sources = self._filter_sources(sources)
        if not valid_sources:
            return {'error': 'no valid sources'}

        manager = Manager()
        successful_downloads = manager.list([])

        def f(source):
            if not successful_downloads:
                result = self.download_from_host(source, output_directory, filename)
                if 'error' in result:
                    self._host_errors[source['host_name']] += 1
                else:
                    successful_downloads.append(result)

        multiprocessing.dummy.Pool(len(valid_sources)).map(f, valid_sources)

        return successful_downloads[0] if successful_downloads else {}
def concurrent_test(robot, rooms, num_trials, start_location = -1, chromosome = None):
    """
    Run the tests in multiple processes. Can be directly swapped out for testAllMaps.
    """
    # Setup variables
    num_rooms    = len(rooms)               # Total number of rooms
    total_trials = num_trials * num_rooms   # Total number of trials
    processes    = []                       # List for all processes
    manager      = Manager()                # Manager to handle result transfer
    dict         = manager.dict()           # Dict which will store results
    
    # Create a process for each room, storing parameters in instance variables
    for i, room in enumerate(rooms):
        process = SimulationProcess(i, dict)
        process.robot          = robot
        process.room           = room
        process.num_trials     = num_trials
        process.start_location = start_location
        process.chromosome     = chromosome
        process.start()
        processes.append(process)
    #end for

    # Print the results
    total_score = 0
    for i, process in enumerate(processes):
        process.join()
        (score, std) = dict[i]
        print("Room %d of %d done (score: %d std: %d)" % (i + 1, num_rooms, score, std))
        total_score += score
    #end for
    
    print("Average score over %d trials: %d" % (total_trials, total_score / num_rooms))
    return total_score / num_rooms
#end concurrent_test
Beispiel #13
0
def run(args):
    # Limit it to a single GPU.
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    conn = create_db(args.db)
    m = Manager()

    logs = args.logging
    datasets = args.datasets
    embeddings = args.embeddings
    settings = args.settings

    # So we don't litter the fs
    dir_ = tempfile.mkdtemp(prefix='baseline-speed-test-')

    try:
        configs = get_configs(args.config)
        if not args.single:
            full_configs = []
            for config in configs:
                full_configs.extend(edit_config(config, args.frameworks, args.no_crf, args.no_attn))
            configs = full_configs
        if args.verbose:
            for config in configs:
                pprint(config)
                print()
            print()
        steps = len(configs)
        pg = create_progress_bar(steps)
        for config in configs:
            write_config = deepcopy(config)
            config['train']['epochs'] = args.trials
            task_name = config['task']

            system_info = m.dict()
            p = Process(
                target=run_model,
                args=(
                    system_info,
                    config,
                    logs,
                    settings,
                    datasets,
                    embeddings,
                    task_name,
                    dir_,
                    int(args.gpu)
                )
            )
            p.start()
            pid = p.pid
            p.join()
            log_file = os.path.join(dir_, 'timing-{}.log'.format(pid))
            speeds = parse_logs(log_file)

            save_data(conn, speeds, write_config, system_info)
            pg.update()
        pg.done()
    finally:
        shutil.rmtree(dir_)
Beispiel #14
0
 def run_multiprocesses_likelihood(self):
     lik = 0.0
     workers = []
     workers_no = self.configuration.num_threads
     corpusSplitlist = self.split_average_data(workers_no)
     
     likmanager = Manager()
     ManagerReturn_corpusSplitlist = []
     ManagerReturn_corpusSplitlist_lik = []
     for dataSplit in corpusSplitlist:
         likreturn_dataSplit = likmanager.list()
         likreturn_dataSplit_likvalue = likmanager.Value("",0.0)
         worker = Process(target=self.splitlikelihood, args=(dataSplit, likreturn_dataSplit, likreturn_dataSplit_likvalue))
         worker.start()
         workers.append(worker)
         ManagerReturn_corpusSplitlist.append(likreturn_dataSplit)
         ManagerReturn_corpusSplitlist_lik.append(likreturn_dataSplit_likvalue)
     for w in workers:
         w.join()
     
     # compute all the likelihood for the splits:
     for v in ManagerReturn_corpusSplitlist_lik:
         lik += v.value
     # update all the docs into corpus, since we compute the doc distribution in likelihood()
     self.corpus.clear()
     for dataSplit in ManagerReturn_corpusSplitlist:
         for doc in dataSplit:
             self.corpus.append(doc)
     
     return lik
Beispiel #15
0
    def __init__(self,port):
        manager = Manager()
        self.status=manager.dict()
        self.sendbuf=manager.list()
        self.p = Process(target=SocketManager, args=(port,self.status,self.sendbuf) )
	self.p.daemon=True
        self.p.start()
Beispiel #16
0
  def record_metrics(self, input_file):    
    mgr = Manager()
    metrics_data = mgr.list()

    procs = []
    for i in range(self.metrics):
      p = Process(target=self.record_metric, args=(input_file, metrics_data))
      p.start()
      procs.append(p)

      if len(procs) >= self.procs:
        for p in procs:
          p.join()
        procs = []
    
    for p in procs:
      p.join()

    l = set()
    for metric in metrics_data:
      if self.non_uniques:
        l.add(metric.bbs)
      else:
        l.add(metric.unique_bbs)

    self.stats["min"] = min(l)
    self.stats["max"] = max(l)
    self.stats["avg"] = reduce(lambda x, y: x + y, l) / float(len(l))

    self.original_stats = dict(self.stats)
    
    self.print_statistics()
Beispiel #17
0
def func_thread():
    a = numpy.random.rand(1000000)
    b = numpy.random.rand(1000000)

    nodata = 0.3

    print "here"
    manager = Manager()
    lock = Lock()
    d = manager.dict()
    ps = []
    start_time = time.clock()
    for i in numpy.where((a > 0.7) & (a < 0.9) & (a != nodata)):
        for j in numpy.where((b > 0.5) & (b < 0.9) & (b != nodata)):

            index = numpy.intersect1d(i, j)
            length = len(index)/2
            array1 = index[:length]
            array2 = index[length:]
            for processes in range(2):
                p = Process(target=f_thread, args=(d, a, b, array1, lock))
                ps.append(p)
                p.start()

            for p in ps:
                p.join()

    print time.clock() - start_time, "seconds"
    print len(d)
Beispiel #18
0
def main():
    if len(sys.argv) > 1:
        print "cmd arg to set directory to: " + sys.argv[1]
        os.chdir(sys.argv[1])

    print "cwd is: " + os.getcwd()

    # make sure we have the correct device

    keepTrying = True
    countCurrent = 0
    countCurrentFail = 0

    manager = Manager()
    sharedDictionary = manager.dict()

    while keepTrying:
        serial0 = serial.Serial("/dev/ttyACM0")  # connection to arduino1
        serial1 = serial.Serial("/dev/ttyACM1")  # connection to arduino2

        try:
            line = ser.readline()  # read ardiono about once every two seconds
            I = float(line.split(" ")[1].strip())  # get the current reading
            countCurrent += 1
        except Exception, e:
            countCurrentFail += 1

        if countCurrent > countCurrentFail + 5:  # 5 good readings
            keepTrying = False
            startThreading(sharedDictionary, serial0, serial1)
        elif countCurrentFail > countCurrent + 5:  # 5 bad readings, do a swap
            keepTrying = False
            startThreading(sharedDictionary, serial1, serial0)

        print " . " + str(countCurrent) + "-" + str(countCurrentFail)
    def timeout_iterator(iterator):
        """Wraps an iterator and makes it timeout after time ``timeout``.

        Parameters
        ----------
        iterator : iterator

        Returns
        -------
        timeout_iterator : iterator
        """
        buffer_ = Manager().Queue()

        process = Process(
            target=partial(map),
            args=(buffer_.put, iterator)
        )

        process.start()
        process.join(timeout)
        process.terminate()

        buffer_.put(QueueStop())

        timeout_iterator = iter(buffer_.get, QueueStop())

        return timeout_iterator
Beispiel #20
0
class LockingSession(object):
    def __init__(self, dataman, session_filename):
        self.dataman = dataman
        self.session_filename = session_filename
        self.lock = Manager().Lock()

    def acquire(self):
        self.lock.acquire()
        self.session = DataManager.shelf(self.session_filename)

    def release(self):
        self.session.close()
        self.session = None
        self.lock.release()

    def __getitem__(self, item):
        self.acquire()
        ret = self.session[item]
        self.release()
        return ret

    def __setitem__(self, item, value):
        self.acquire()
        self.session[item] = value
        self.release()
Beispiel #21
0
def sync():
    from multiprocessing import Manager
    from common import bounty, settings, peers
    from common.safeprint import safeprint
    man = Manager()
    items = {'config':man.dict(),
             'peerList':man.list(),
             'bountyList':man.list(),
             'bountyLock':bounty.bountyLock,
             'keyList':man.list()}
    items['config'].update(settings.config)
    items['peerList'].extend(peers.peerlist)
    items['bountyList'].extend(bounty.bountyList)
    safeprint(items)
    safeprint(items.get('bountyList'))
    safeprint(items.get('keyList'))
    if items.get('config') is not None:
        from common import settings
        settings.config = items.get('config')
    if items.get('peerList') is not None:
        global peerList
        peers.peerlist = items.get('peerList')
    if items.get('bountyList') is not None:
        from common import bounty
        bounty.bountyList = items.get('bountyList')
    if items.get('bountyLock') is not None:
        from common import bounty
        bounty.bountyLock = items.get('bountyLock')
    return items
def pricing(dual):
    cpus = cpu_count() - int(argv[2])
    '''process for getting new columns'''
    final = pow(2, K)
    if K < 23:
        section = final
    else:
        section = 100 * cpus # probar valores
    to = 0
    since = 1
    manager = Manager()
    elements = manager.list([RETAILERS, DCS, PLANTS])
    out = manager.Queue() # queue with the result from each worker
    while to < final:
        p = Pool(cpus)
        to = min(since + section, final)
        boss = p.apply_async(coordinator, (out,))
        workers = [p.apply_async(work, (k, elements, dual, out))  for k in xrange(since, to)]
        enviados = 0
        for w in workers:
            enviados += w.get()
        out.put('ok')
        a = boss.get()
        assert a.counter == enviados
        since = to + 1
        p.terminate()
    return a
Beispiel #23
0
def solve(iterations, proc_count):

    queue = JoinableQueue()
    partition = get_iterations_partition(iterations, proc_count)
    for iteration in partition:
        queue.put(iteration)
    for i in range(process_count):
        queue.put(None)

    manager = Manager()
    result = manager.list()
    processes = []

    cur_time = time.time()
    for i in range(process_count):
        proc = Process(target=worker, args=(queue, result,))
        proc.start()
        processes.append(proc)

    queue.join()
    for proc in processes:
        proc.join()

    cur_time = time.time() - cur_time
    print_results(cur_time, result, iterations)
Beispiel #24
0
    def spawn(self, n=GAME_CT):
        # Fallback on import error or single core
        try:
            from multiprocessing import Process, Manager, cpu_count
        except ImportError:
            return self.run(n)

        # For low n multiprocessing does not gain much speed up
        if cpu_count() <= 1 or n < 500:
            return self.run(n)

        m = Manager()
        self.results = m.list()
        procs = []
        load = [n // cpu_count()] * cpu_count()

        # add the rest from division to last cpu
        load[-1] += n % cpu_count()

        for count in load:
            proc = Process(target=self.run, args=(count,))
            proc.start()
            procs.append(proc)

        [p.join() for p in procs]
Beispiel #25
0
def run():
    # build the mdp
    start = time.time()
    room_size = 3
    num_rooms = 5
    mdp = maze_mdp.MazeMDP(room_size=room_size, num_rooms=num_rooms)

    # build the agent
    m = Manager()
    init_dict = {(s, a): 0 for s in mdp.states for a in mdp.ACTIONS + [None]}
    shared_weights = m.dict(init_dict)
    shared_value_weights = m.dict(init_dict)
    agent = async_actor_critic.AsyncActorCritic(actions=mdp.ACTIONS, discount=mdp.DISCOUNT, 
        weights=shared_weights, value_weights=shared_value_weights, tau=.3, learning_rate=.5)

    # build a single experiment
    rewards = m.list()
    start_state_values = m.list()
    max_steps = (2 * room_size * num_rooms) ** 2
    exp = experiment.Experiment(mdp=mdp, agent=agent, num_episodes=800, max_steps=max_steps,
        rewards=rewards, start_state_values=start_state_values)

    # run the experiment
    multiexperiment = experiment.MultiProcessExperiment(experiment=exp, num_agents=NUM_PROCESSES)
    multiexperiment.run()

    # report results
    end = time.time()
    print 'took {} seconds to converge'.format(end - start)
    mdp.print_state_values(shared_value_weights)
    optimal = mdp.EXIT_REWARD + (2 * room_size * num_rooms * mdp.MOVE_REWARD)
    utils.plot_values(rewards, optimal, 'rewards')
    utils.plot_values(start_state_values, optimal, 'start state value')
 def scanner_network(self,gateway):
     get_ip = len(gateway)-1
     gateway = gateway[:get_ip]
     ranger = str(self.ip_range.text()).split("-")
     self.control = True
     jobs = []
     manager = Manager()
     on_ips = manager.dict()
     for n in xrange(int(ranger[0]),int(ranger[1])):
         ip="%s{0}".format(n)%(gateway)
         p = Process(target=self.working,args=(ip,on_ips))
         jobs.append(p)
         p.start()
     for i in jobs: i.join()
     for i in on_ips.values():
         Headers = []
         n = i.split("|")
         self.data['IPaddress'].append(n[0])
         self.data['MacAddress'].append(n[1])
         self.data['Hostname'].append("<unknown>")
         for n, key in enumerate(reversed(self.data.keys())):
             Headers.append(key)
             for m, item in enumerate(self.data[key]):
                 item = QTableWidgetItem(item)
                 item.setTextAlignment(Qt.AlignVCenter | Qt.AlignCenter)
                 self.tables.setItem(m, n, item)
         self.scanner_OFF(False,"txt_status")
     Headers = []
     for key in reversed(self.data.keys()):
         Headers.append(key)
     self.tables.setHorizontalHeaderLabels(Headers)
 def __init__(self):
     manager = Manager()
     
     self.flow_to_state_map = manager.dict()
     self.flow_to_state_map.clear()
     self.trigger = manager.Value('i', 0)
     self.comp = manager.Value('i', 0) # sequential = 0, parallel = 1 
Beispiel #28
0
def multi_download(url_and_name_list, num_threads=8):
    ''' accepts list of tuples, where t[0] = url and t[1] = filename '''
    manager = Manager()

    #pylint: disable=no-member
    m_list = manager.list()
    #pylint: enable=no-member
    log = logging.getLogger('multi_dl')
    log.debug('starting pool with ' + str(num_threads) + ' workers')

    monitor_thread = Process(target=download_monitor,
            args=((m_list, len(url_and_name_list)),))

    monitor_thread.start()
    workers = Pool(processes=num_threads)
    work = workers.map_async(single_download,
            zip(url_and_name_list, repeat(m_list)))

    # this hack makes the async_map respond to ^C interrupts
    try:
        work.get(0xFFFF)
        monitor_thread.join()
        sys.stdout.write('\n\n')
    except KeyboardInterrupt:
        print 'parent received control-c'
        exit()
Beispiel #29
0
def crackTicket(ticket, label, hashList):
	try:
		data = base64.b64decode(ticket)
	except:
		#print "DEBUG\n" + str(ticket) + "DEBUG\n\n"
		return "FAIL" + str(label) + "\n"
	
	manager = Manager()
	enctickets = manager.list()

	if data[0] == '\x76':
		try:
			enctickets.append((str(decoder.decode(data)[0][2][0][3][2])))
		except:
			#print "DEBUG\n" + str(ticket) + "DEBUG\n\n"
			return "FAIL" + str(label)
	elif data[:2] == '6d':
		for ticket in data.strip().split('\n'):
			try:
				enctickets.append((str(decoder.decode(ticket.decode('hex'))[0][4][3][2])))
			except:
				#print "DEBUG\n" + str(ticket) + "DEBUG\n\n"
				return "FAIL" + str(label)

	print "\nAccount: " + label

	for currentHash in hashList:
		ntlmHash_hex = binascii.unhexlify(currentHash)
		kdata, nonce = kerberos.decrypt(ntlmHash_hex, 2, enctickets[0])
		if kdata:
			print "NTLM Hash: " + currentHash
			break

	return ""
Beispiel #30
0
def aggress(map):
    global startMap
    startMap = map

    #print "Regressing..."
    state = State()

    jobs = []

    longestSolution = Value('d', 20)
    highestScore = Value('d', 0)

    queue = JoinableQueue()

    manager = Manager()

    d = manager.dict()
    d.clear()

    l = RLock()

    if multiProc:
        queue.put((state, map, 1))

        for i in range(numProcs):
           p = Process(target = multiMain, args=(startMap, l, d, queue,highestScore))
           p.start()

        queue.join()
    else:
        a(l, highestScore, d, None, state, map, 1)
Beispiel #31
0
def main():
    # Hard-coded parameters needed for USGS datasets
    usgs_product_dict = {
        "ned": {
            "product": "National Elevation Dataset (NED)",
            "dataset": {
                "ned1sec": (1.0 / 3600, 30, 100),
                "ned13sec": (1.0 / 3600 / 3, 10, 30),
                "ned19sec": (1.0 / 3600 / 9, 3, 10),
            },
            "subset": {},
            "extent": ["1 x 1 degree", "15 x 15 minute"],
            "format": "IMG",
            "extension": "img",
            "zip": True,
            "srs": "wgs84",
            "srs_proj4": "+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs",
            "interpolation": "bilinear",
            "url_split": "/",
        },
        "nlcd": {
            "product": "National Land Cover Database (NLCD)",
            "dataset": {
                "National Land Cover Database (NLCD) - 2001":
                (1.0 / 3600, 30, 100),
                "National Land Cover Database (NLCD) - 2006":
                (1.0 / 3600, 30, 100),
                "National Land Cover Database (NLCD) - 2011":
                (1.0 / 3600, 30, 100),
            },
            "subset": {
                "Percent Developed Imperviousness",
                "Percent Tree Canopy",
                "Land Cover",
            },
            "extent": ["3 x 3 degree"],
            "format": "GeoTIFF",
            "extension": "tif",
            "zip": True,
            "srs": "wgs84",
            "srs_proj4": "+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs",
            "interpolation": "nearest",
            "url_split": "/",
        },
        "naip": {
            "product": "USDA National Agriculture Imagery Program (NAIP)",
            "dataset": {
                "Imagery - 1 meter (NAIP)": (1.0 / 3600 / 27, 1, 3)
            },
            "subset": {},
            "extent": [
                "3.75 x 3.75 minute",
            ],
            "format": "JPEG2000",
            "extension": "jp2",
            "zip": False,
            "srs": "wgs84",
            "srs_proj4": "+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs",
            "interpolation": "nearest",
            "url_split": "/",
        },
        "lidar": {
            "product": "Lidar Point Cloud (LPC)",
            "dataset": {
                "Lidar Point Cloud (LPC)": (1.0 / 3600 / 9, 3, 10)
            },
            "subset": {},
            "extent": [""],
            "format": "LAS,LAZ",
            "extension": "las,laz",
            "zip": True,
            "srs": "",
            "srs_proj4": "+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs",
            "interpolation": "nearest",
            "url_split": "/",
        },
    }

    # Set GRASS GUI options and flags to python variables
    gui_product = options["product"]

    # Variable assigned from USGS product dictionary
    nav_string = usgs_product_dict[gui_product]
    product = nav_string["product"]
    product_format = nav_string["format"]
    product_extensions = tuple(nav_string["extension"].split(","))
    product_is_zip = nav_string["zip"]
    product_srs = nav_string["srs"]
    product_proj4 = nav_string["srs_proj4"]
    product_interpolation = nav_string["interpolation"]
    product_url_split = nav_string["url_split"]
    product_extent = nav_string["extent"]
    gui_subset = None

    # Parameter assignments for each dataset
    if gui_product == "ned":
        gui_dataset = options["ned_dataset"]
        ned_api_name = ""
        if options["ned_dataset"] == "ned1sec":
            ned_data_abbrv = "ned_1arc_"
            ned_api_name = "1 arc-second"
        if options["ned_dataset"] == "ned13sec":
            ned_data_abbrv = "ned_13arc_"
            ned_api_name = "1/3 arc-second"
        if options["ned_dataset"] == "ned19sec":
            ned_data_abbrv = "ned_19arc_"
            ned_api_name = "1/9 arc-second"
        product_tag = product + " " + ned_api_name

    if gui_product == "nlcd":
        gui_dataset = options["nlcd_dataset"]
        if options["nlcd_dataset"] == "nlcd2001":
            gui_dataset = "National Land Cover Database (NLCD) - 2001"
        if options["nlcd_dataset"] == "nlcd2006":
            gui_dataset = "National Land Cover Database (NLCD) - 2006"
        if options["nlcd_dataset"] == "nlcd2011":
            gui_dataset = "National Land Cover Database (NLCD) - 2011"

        if options["nlcd_subset"] == "landcover":
            gui_subset = "Land Cover"
        if options["nlcd_subset"] == "impervious":
            gui_subset = "Percent Developed Imperviousness"
        if options["nlcd_subset"] == "canopy":
            gui_subset = "Percent Tree Canopy"
        product_tag = gui_dataset

    if gui_product == "naip":
        gui_dataset = "Imagery - 1 meter (NAIP)"
        product_tag = nav_string["product"]

    has_pdal = gscript.find_program(pgm="v.in.pdal")
    if gui_product == "lidar":
        gui_dataset = "Lidar Point Cloud (LPC)"
        product_tag = nav_string["product"]
        if not has_pdal:
            gscript.warning(
                _("Module v.in.pdal is missing,"
                  " any downloaded data will not be processed."))
    # Assigning further parameters from GUI
    gui_output_layer = options["output_name"]
    gui_resampling_method = options["resampling_method"]
    gui_i_flag = flags["i"]
    gui_k_flag = flags["k"]
    work_dir = options["output_directory"]
    memory = options["memory"]
    nprocs = options["nprocs"]

    preserve_extracted_files = gui_k_flag
    use_existing_extracted_files = True
    preserve_imported_tiles = gui_k_flag
    use_existing_imported_tiles = True

    if not os.path.isdir(work_dir):
        gscript.fatal(
            _("Directory <{}> does not exist."
              " Please create it.").format(work_dir))

    # Returns current units
    try:
        proj = gscript.parse_command("g.proj", flags="g")
        if gscript.locn_is_latlong():
            product_resolution = nav_string["dataset"][gui_dataset][0]
        elif float(proj["meters"]) == 1:
            product_resolution = nav_string["dataset"][gui_dataset][1]
        else:
            # we assume feet
            product_resolution = nav_string["dataset"][gui_dataset][2]
    except TypeError:
        product_resolution = False
    if gui_product == "lidar" and options["resolution"]:
        product_resolution = float(options["resolution"])

    if gui_resampling_method == "default":
        gui_resampling_method = nav_string["interpolation"]
        gscript.verbose(
            _("The default resampling method for product {product} is {res}").
            format(product=gui_product, res=product_interpolation))

    # Get coordinates for current GRASS computational region and convert to USGS SRS
    gregion = gscript.region()
    wgs84 = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
    min_coords = gscript.read_command(
        "m.proj",
        coordinates=(gregion["w"], gregion["s"]),
        proj_out=wgs84,
        separator="comma",
        flags="d",
    )
    max_coords = gscript.read_command(
        "m.proj",
        coordinates=(gregion["e"], gregion["n"]),
        proj_out=wgs84,
        separator="comma",
        flags="d",
    )
    min_list = min_coords.split(",")[:2]
    max_list = max_coords.split(",")[:2]
    list_bbox = min_list + max_list
    str_bbox = ",".join((str(coord) for coord in list_bbox))

    # Format variables for TNM API call
    gui_prod_str = str(product_tag)
    datasets = quote_plus(gui_prod_str)
    prod_format = quote_plus(product_format)
    prod_extent = quote_plus(product_extent[0])

    # Create TNM API URL
    base_TNM = "https://tnmaccess.nationalmap.gov/api/v1/products?"
    datasets_TNM = "datasets={0}".format(datasets)
    bbox_TNM = "&bbox={0}".format(str_bbox)
    prod_format_TNM = "&prodFormats={0}".format(prod_format)
    TNM_API_URL = base_TNM + datasets_TNM + bbox_TNM + prod_format_TNM
    if gui_product == "nlcd":
        TNM_API_URL += "&prodExtents={0}".format(prod_extent)
    gscript.verbose("TNM API Query URL:\t{0}".format(TNM_API_URL))

    # Query TNM API
    try_again_messge = _(
        "Possibly, the query has timed out. Check network configuration and try again."
    )
    try:
        TNM_API_GET = urlopen(TNM_API_URL, timeout=12)
    except HTTPError as error:
        gscript.fatal(
            _("HTTP(S) error from USGS TNM API:"
              " {code}: {reason} ({instructions})").format(
                  reason=error.reason,
                  code=error.code,
                  instructions=try_again_messge))
    except (URLError, OSError, IOError) as error:
        # Catching also SSLError and potentially others which are
        # subclasses of IOError in Python 2 and of OSError in Python 3.
        gscript.fatal(
            _("Error accessing USGS TNM API: {error} ({instructions})").format(
                error=error, instructions=try_again_messge))

    # Parse return JSON object from API query
    try:
        return_JSON = json.load(TNM_API_GET)
        if return_JSON["errors"]:
            TNM_API_error = return_JSON["errors"]
            api_error_msg = "TNM API Error - {0}".format(str(TNM_API_error))
            gscript.fatal(api_error_msg)
        if gui_product == "lidar" and options["title_filter"]:
            return_JSON["items"] = [
                item for item in return_JSON["items"]
                if options["title_filter"] in item["title"]
            ]
            return_JSON["total"] = len(return_JSON["items"])

    except:
        gscript.fatal(_("Unable to load USGS JSON object."))

    # Functions down_list() and exist_list() used to determine
    # existing files and those that need to be downloaded.
    def down_list():
        dwnld_url.append(TNM_file_URL)
        dwnld_size.append(TNM_file_size)
        TNM_file_titles.append(TNM_file_title)
        if product_is_zip:
            extract_zip_list.append(local_zip_path)

    def exist_list():
        exist_TNM_titles.append(TNM_file_title)
        exist_dwnld_url.append(TNM_file_URL)
        if product_is_zip:
            exist_zip_list.append(local_zip_path)
            extract_zip_list.append(local_zip_path)
        else:
            exist_tile_list.append(local_tile_path)

    # Assign needed parameters from returned JSON
    tile_API_count = int(return_JSON["total"])
    tiles_needed_count = 0
    # TODO: Make the tolerance configurable.
    # Some combinations produce >10 byte differences.
    size_diff_tolerance = 5
    exist_dwnld_size = 0
    if tile_API_count > 0:
        dwnld_size = []
        dwnld_url = []
        TNM_file_titles = []
        exist_dwnld_url = []
        exist_TNM_titles = []
        exist_zip_list = []
        exist_tile_list = []
        extract_zip_list = []
        # for each file returned, assign variables to needed parameters
        for f in return_JSON["items"]:
            TNM_file_title = f["title"]
            TNM_file_URL = str(f["downloadURL"])
            TNM_file_size = int(f["sizeInBytes"])
            TNM_file_name = TNM_file_URL.split(product_url_split)[-1]
            if gui_product == "ned":
                local_file_path = os.path.join(work_dir,
                                               ned_data_abbrv + TNM_file_name)
                local_zip_path = os.path.join(work_dir,
                                              ned_data_abbrv + TNM_file_name)
                local_tile_path = os.path.join(work_dir,
                                               ned_data_abbrv + TNM_file_name)
            else:
                local_file_path = os.path.join(work_dir, TNM_file_name)
                local_zip_path = os.path.join(work_dir, TNM_file_name)
                local_tile_path = os.path.join(work_dir, TNM_file_name)
            file_exists = os.path.exists(local_file_path)
            file_complete = None
            # If file exists, do not download,
            # but if incomplete (e.g. interupted download), redownload.
            if file_exists:
                existing_local_file_size = os.path.getsize(local_file_path)
                # if local file is incomplete
                if abs(existing_local_file_size -
                       TNM_file_size) > size_diff_tolerance:
                    gscript.verbose(
                        _("Size of local file {filename} ({local_size}) differs"
                          " from a file size specified in the API ({api_size})"
                          " by {difference} bytes"
                          " which is more than tolerance ({tolerance})."
                          " It will be downloaded again.").format(
                              filename=local_file_path,
                              local_size=existing_local_file_size,
                              api_size=TNM_file_size,
                              difference=abs(existing_local_file_size -
                                             TNM_file_size),
                              tolerance=size_diff_tolerance,
                          ))
                    # NLCD API query returns subsets that cannot be filtered before
                    # results are returned. gui_subset is used to filter results.
                    if not gui_subset:
                        tiles_needed_count += 1
                        down_list()
                    else:
                        if gui_subset in TNM_file_title:
                            tiles_needed_count += 1
                            down_list()
                        else:
                            continue
                else:
                    if not gui_subset:
                        tiles_needed_count += 1
                        exist_list()
                        exist_dwnld_size += TNM_file_size
                    else:
                        if gui_subset in TNM_file_title:
                            tiles_needed_count += 1
                            exist_list()
                            exist_dwnld_size += TNM_file_size
                        else:
                            continue
            else:
                if not gui_subset:
                    tiles_needed_count += 1
                    down_list()
                else:
                    if gui_subset in TNM_file_title:
                        tiles_needed_count += 1
                        down_list()
                        continue

    # return fatal error if API query returns no results for GUI input
    elif tile_API_count == 0:
        gscript.fatal(
            _("TNM API ERROR or Zero tiles available for given input parameters."
              ))

    # number of files to be downloaded
    file_download_count = len(dwnld_url)

    # remove existing files from download lists
    for t in exist_TNM_titles:
        if t in TNM_file_titles:
            TNM_file_titles.remove(t)
    for url in exist_dwnld_url:
        if url in dwnld_url:
            dwnld_url.remove(url)

    # messages to user about status of files to be kept, removed, or downloaded
    if exist_zip_list:
        exist_msg = _(
            "\n{0} of {1} files/archive(s) exist locally and will be used by module."
        ).format(len(exist_zip_list), tiles_needed_count)
        gscript.message(exist_msg)
    # TODO: fix this way of reporting and merge it with the one in use
    if exist_tile_list:
        exist_msg = _(
            "\n{0} of {1} files/archive(s) exist locally and will be used by module."
        ).format(len(exist_tile_list), tiles_needed_count)
        gscript.message(exist_msg)

    # formats JSON size from bites into needed units for combined file size
    if dwnld_size:
        total_size = sum(dwnld_size)
        len_total_size = len(str(total_size))
        if 6 < len_total_size < 10:
            total_size_float = total_size * 1e-6
            total_size_str = str("{0:.2f}".format(total_size_float) + " MB")
        if len_total_size >= 10:
            total_size_float = total_size * 1e-9
            total_size_str = str("{0:.2f}".format(total_size_float) + " GB")
    else:
        total_size_str = "0"

    # Prints 'none' if all tiles available locally
    if TNM_file_titles:
        TNM_file_titles_info = "\n".join(TNM_file_titles)
    else:
        TNM_file_titles_info = "none"

    # Formatted return for 'i' flag
    if file_download_count <= 0:
        data_info = "USGS file(s) to download: NONE"
        if gui_product == "nlcd":
            if tile_API_count != file_download_count:
                if tiles_needed_count == 0:
                    nlcd_unavailable = (
                        "NLCD {0} data unavailable for input parameters".
                        format(gui_subset))
                    gscript.fatal(nlcd_unavailable)
    else:
        data_info = (
            "USGS file(s) to download:",
            "-------------------------",
            "Total download size:\t{size}",
            "Tile count:\t{count}",
            "USGS SRS:\t{srs}",
            "USGS tile titles:\n{tile}",
            "-------------------------",
        )
        data_info = "\n".join(data_info).format(
            size=total_size_str,
            count=file_download_count,
            srs=product_srs,
            tile=TNM_file_titles_info,
        )
    print(data_info)

    if gui_i_flag:
        gscript.info(
            _("To download USGS data, remove <i> flag, and rerun r.in.usgs."))
        sys.exit()

    # USGS data download process
    if file_download_count <= 0:
        gscript.message(_("Extracting existing USGS Data..."))
    else:
        gscript.message(_("Downloading USGS Data..."))

    TNM_count = len(dwnld_url)
    download_count = 0
    local_tile_path_list = []
    local_zip_path_list = []
    patch_names = []

    # Download files
    for url in dwnld_url:
        # create file name by splitting name from returned url
        # add file name to local download directory
        if gui_product == "ned":
            file_name = ned_data_abbrv + url.split(product_url_split)[-1]
            local_file_path = os.path.join(work_dir, file_name)
        else:
            file_name = url.split(product_url_split)[-1]
            local_file_path = os.path.join(work_dir, file_name)
        try:
            # download files in chunks rather than write complete files to memory
            dwnld_req = urlopen(url, timeout=12)
            download_bytes = int(dwnld_req.info()["Content-Length"])
            CHUNK = 16 * 1024
            with open(local_file_path, "wb+") as local_file:
                count = 0
                steps = int(download_bytes / CHUNK) + 1
                while True:
                    chunk = dwnld_req.read(CHUNK)
                    gscript.percent(count, steps, 10)
                    count += 1
                    if not chunk:
                        break
                    local_file.write(chunk)
                gscript.percent(1, 1, 1)
            local_file.close()
            download_count += 1
            # determine if file is a zip archive or another format
            if product_is_zip:
                local_zip_path_list.append(local_file_path)
            else:
                local_tile_path_list.append(local_file_path)
            file_complete = "Download {0} of {1}: COMPLETE".format(
                download_count, TNM_count)
            gscript.info(file_complete)
        except URLError:
            gscript.fatal(
                _("USGS download request has timed out. Network or formatting error."
                  ))
        except StandardError:
            cleanup_list.append(local_file_path)
            if download_count:
                file_failed = "Download {0} of {1}: FAILED".format(
                    download_count, TNM_count)
                gscript.fatal(file_failed)

    # sets already downloaded zip files or tiles to be extracted or imported
    # our pre-stats for extraction are broken, collecting stats during
    used_existing_extracted_tiles_num = 0
    removed_extracted_tiles_num = 0
    old_extracted_tiles_num = 0
    extracted_tiles_num = 0
    if exist_zip_list:
        for z in exist_zip_list:
            local_zip_path_list.append(z)
    if exist_tile_list:
        for t in exist_tile_list:
            local_tile_path_list.append(t)
    if product_is_zip:
        if file_download_count == 0:
            pass
        else:
            gscript.message("Extracting data...")
        # for each zip archive, extract needed file
        files_to_process = len(local_zip_path_list)
        for i, z in enumerate(local_zip_path_list):
            # TODO: measure only for the files being unzipped
            gscript.percent(i, files_to_process, 10)
            # Extract tiles from ZIP archives
            try:
                with zipfile.ZipFile(z, "r") as read_zip:
                    for f in read_zip.namelist():
                        if f.lower().endswith(product_extensions):
                            extracted_tile = os.path.join(work_dir, str(f))
                            remove_and_extract = True
                            if os.path.exists(extracted_tile):
                                if use_existing_extracted_files:
                                    # if the downloaded file is newer
                                    # than the extracted on, we extract
                                    if os.path.getmtime(
                                            extracted_tile) < os.path.getmtime(
                                                z):
                                        remove_and_extract = True
                                        old_extracted_tiles_num += 1
                                    else:
                                        remove_and_extract = False
                                        used_existing_extracted_tiles_num += 1
                                else:
                                    remove_and_extract = True
                                if remove_and_extract:
                                    removed_extracted_tiles_num += 1
                                    os.remove(extracted_tile)
                            if remove_and_extract:
                                extracted_tiles_num += 1
                                read_zip.extract(f, work_dir)
                if os.path.exists(extracted_tile):
                    local_tile_path_list.append(extracted_tile)
                    if not preserve_extracted_files:
                        cleanup_list.append(extracted_tile)
            except IOError as error:
                cleanup_list.append(extracted_tile)
                gscript.fatal(
                    _("Unable to locate or extract IMG file '{filename}'"
                      " from ZIP archive '{zipname}': {error}").format(
                          filename=extracted_tile, zipname=z, error=error))
        gscript.percent(1, 1, 1)
        # TODO: do this before the extraction begins
        gscript.verbose(
            _("Extracted {extracted} new tiles and"
              " used {used} existing tiles").format(
                  used=used_existing_extracted_tiles_num,
                  extracted=extracted_tiles_num))
        if old_extracted_tiles_num:
            gscript.verbose(
                _("Found {removed} existing tiles older"
                  " than the corresponding downloaded archive").format(
                      removed=old_extracted_tiles_num))
        if removed_extracted_tiles_num:
            gscript.verbose(
                _("Removed {removed} existing tiles").format(
                    removed=removed_extracted_tiles_num))

    if gui_product == "lidar" and not has_pdal:
        gscript.fatal(
            _("Module v.in.pdal is missing,"
              " cannot process downloaded data."))

    # operations for extracted or complete files available locally
    # We are looking only for the existing maps in the current mapset,
    # but theoretically we could be getting them from other mapsets
    # on search path or from the whole location. User may also want to
    # store the individual tiles in a separate mapset.
    # The big assumption here is naming of the maps (it is a smaller
    # for the files in a dedicated download directory).
    used_existing_imported_tiles_num = 0
    imported_tiles_num = 0
    mapset = get_current_mapset()
    files_to_import = len(local_tile_path_list)

    process_list = []
    process_id_list = []
    process_count = 0
    num_tiles = len(local_tile_path_list)

    with Manager() as manager:
        results = manager.dict()
        for i, t in enumerate(local_tile_path_list):
            # create variables for use in GRASS GIS import process
            LT_file_name = os.path.basename(t)
            LT_layer_name = os.path.splitext(LT_file_name)[0]
            # we are removing the files if requested even if we don't use them
            # do not remove by default with NAIP, there are no zip files
            if gui_product != "naip" and not preserve_extracted_files:
                cleanup_list.append(t)
            # TODO: unlike the files, we don't compare date with input
            if use_existing_imported_tiles and map_exists(
                    "raster", LT_layer_name, mapset):
                patch_names.append(LT_layer_name)
                used_existing_imported_tiles_num += 1
            else:
                in_info = _("Importing and reprojecting {name}"
                            " ({count} out of {total})...").format(
                                name=LT_file_name,
                                count=i + 1,
                                total=files_to_import)
                gscript.info(in_info)

                process_count += 1
                if gui_product != "lidar":
                    process = Process(
                        name="Import-{}-{}-{}".format(process_count, i,
                                                      LT_layer_name),
                        target=run_file_import,
                        kwargs=dict(
                            identifier=i,
                            results=results,
                            input=t,
                            output=LT_layer_name,
                            resolution="value",
                            resolution_value=product_resolution,
                            extent="region",
                            resample=product_interpolation,
                            memory=memory,
                        ),
                    )
                else:
                    srs = options["input_srs"]
                    process = Process(
                        name="Import-{}-{}-{}".format(process_count, i,
                                                      LT_layer_name),
                        target=run_lidar_import,
                        kwargs=dict(
                            identifier=i,
                            results=results,
                            input=t,
                            output=LT_layer_name,
                            input_srs=srs if srs else None,
                        ),
                    )
                process.start()
                process_list.append(process)
                process_id_list.append(i)

            # Wait for processes to finish when we reached the max number
            # of processes.
            if process_count == nprocs or i == num_tiles - 1:
                exitcodes = 0
                for process in process_list:
                    process.join()
                    exitcodes += process.exitcode
                if exitcodes != 0:
                    if nprocs > 1:
                        gscript.fatal(
                            _("Parallel import and reprojection failed."
                              " Try running with nprocs=1."))
                    else:
                        gscript.fatal(
                            _("Import and reprojection step failed."))
                for identifier in process_id_list:
                    if "errors" in results[identifier]:
                        gscript.warning(results[identifier]["errors"])
                    else:
                        patch_names.append(results[identifier]["output"])
                        imported_tiles_num += 1
                # Empty the process list
                process_list = []
                process_id_list = []
                process_count = 0
        # no process should be left now
        assert not process_list
        assert not process_id_list
        assert not process_count

    gscript.verbose(
        _("Imported {imported} new tiles and"
          " used {used} existing tiles").format(
              used=used_existing_imported_tiles_num,
              imported=imported_tiles_num))

    # if control variables match and multiple files need to be patched,
    # check product resolution, run r.patch

    # v.surf.rst lidar params
    rst_params = dict(tension=25, smooth=0.1, npmin=100)

    # Check that downloaded files match expected count
    completed_tiles_count = len(local_tile_path_list)
    if completed_tiles_count == tiles_needed_count:
        if len(patch_names) > 1:
            try:
                gscript.use_temp_region()
                # set the resolution
                if product_resolution:
                    gscript.run_command("g.region",
                                        res=product_resolution,
                                        flags="a")
                if gui_product == "naip":
                    for i in ("1", "2", "3", "4"):
                        patch_names_i = [
                            name + "." + i for name in patch_names
                        ]
                        output = gui_output_layer + "." + i
                        gscript.run_command("r.patch",
                                            input=patch_names_i,
                                            output=output)
                        gscript.raster_history(output)
                elif gui_product == "lidar":
                    gscript.run_command(
                        "v.patch",
                        flags="nzb",
                        input=patch_names,
                        output=gui_output_layer,
                    )
                    gscript.run_command("v.surf.rst",
                                        input=gui_output_layer,
                                        elevation=gui_output_layer,
                                        nprocs=nprocs,
                                        **rst_params)
                else:
                    gscript.run_command("r.patch",
                                        input=patch_names,
                                        output=gui_output_layer)
                    gscript.raster_history(gui_output_layer)
                gscript.del_temp_region()
                out_info = ("Patched composite layer '{0}' added"
                            ).format(gui_output_layer)
                gscript.verbose(out_info)
                # Remove files if not -k flag
                if not preserve_imported_tiles:
                    if gui_product == "naip":
                        for i in ("1", "2", "3", "4"):
                            patch_names_i = [
                                name + "." + i for name in patch_names
                            ]
                            gscript.run_command("g.remove",
                                                type="raster",
                                                name=patch_names_i,
                                                flags="f")
                    elif gui_product == "lidar":
                        gscript.run_command(
                            "g.remove",
                            type="vector",
                            name=patch_names + [gui_output_layer],
                            flags="f",
                        )
                    else:
                        gscript.run_command("g.remove",
                                            type="raster",
                                            name=patch_names,
                                            flags="f")
            except CalledModuleError:
                gscript.fatal("Unable to patch tiles.")
            temp_down_count = _(
                "{0} of {1} tiles successfully imported and patched").format(
                    completed_tiles_count, tiles_needed_count)
            gscript.info(temp_down_count)
        elif len(patch_names) == 1:
            if gui_product == "naip":
                for i in ("1", "2", "3", "4"):
                    gscript.run_command(
                        "g.rename",
                        raster=(patch_names[0] + "." + i,
                                gui_output_layer + "." + i),
                    )
            elif gui_product == "lidar":
                if product_resolution:
                    gscript.run_command("g.region",
                                        res=product_resolution,
                                        flags="a")
                gscript.run_command("v.surf.rst",
                                    input=patch_names[0],
                                    elevation=gui_output_layer,
                                    nprocs=nprocs,
                                    **rst_params)
                if not preserve_imported_tiles:
                    gscript.run_command("g.remove",
                                        type="vector",
                                        name=patch_names[0],
                                        flags="f")
            else:
                gscript.run_command("g.rename",
                                    raster=(patch_names[0], gui_output_layer))
            temp_down_count = _("Tile successfully imported")
            gscript.info(temp_down_count)
        else:
            gscript.fatal(
                _("No tiles imported successfully. Nothing to patch."))
    else:
        gscript.fatal(
            _("Error in getting or importing the data (see above). Please retry."
              ))

    # Keep source files if 'k' flag active
    if gui_k_flag:
        src_msg = (
            "<k> flag selected: Source tiles remain in '{0}'").format(work_dir)
        gscript.info(src_msg)

    # set appropriate color table
    if gui_product == "ned":
        gscript.run_command("r.colors",
                            map=gui_output_layer,
                            color="elevation")

    # composite NAIP
    if gui_product == "naip":
        gscript.use_temp_region()
        gscript.run_command("g.region", raster=gui_output_layer + ".1")
        gscript.run_command(
            "r.composite",
            red=gui_output_layer + ".1",
            green=gui_output_layer + ".2",
            blue=gui_output_layer + ".3",
            output=gui_output_layer,
        )
        gscript.raster_history(gui_output_layer)
        gscript.del_temp_region()
 createDatabase(aggregatedAndImputedDatebaseName)
 dataset = getAllRecordsFromDatabase(databaseName)
 lengthOfDataset = len(dataset)
 dataGroupCount = int(lengthOfDataset / cpuCoreCount)
 dataGroupCollection = list()
 for i in range(cpuCoreCount):
     if i != cpuCoreCount - 1:
         subDataset = dataset[i * dataGroupCount:(i + 1) *
                              dataGroupCount, :]
         dataGroupCollection.append(subDataset)
     else:
         subDataset = dataset[i * dataGroupCount:, :]
         dataGroupCollection.append(subDataset)
 processDictionary = dict()
 resultFromProcessesDictionary = dict()
 with Manager() as manager:
     for i in range(cpuCoreCount):
         resultFromProcessesDictionary[i] = manager.list()
         resultList = resultFromProcessesDictionary.get(i)
         processDictionary[i] = Process(target=processSubDataset,
                                        args=(dataGroupCollection[i],
                                              resultList))
     for i in range(cpuCoreCount):
         processDictionary.get(i).start()
     for i in range(cpuCoreCount):
         processDictionary.get(i).join()
     datasetFromProcesses = list()
     for i in range(cpuCoreCount):
         datasetFromProcesses.extend(
             resultFromProcessesDictionary.get(i))
     print(len(datasetFromProcesses))
def pipeline_STEP1(cfg, logger, cpu_number):
    s = "Working with STEP 1: running MAFFT"
    print(s)
    logger.info(s)
    input_folder = cfg["input_folder"]
    output_folder = cfg["output_folder"]
    out_suffix = cfg["output_suffix"]
    create_result_folder(output_folder, logger)
    os.chdir(input_folder)
    unwanted_files = [
        x for x in os.listdir(os.getcwd()) if "_temp_aligned.fasta" in x
    ]

    # Remove files from a previously aborted run .
    for f in unwanted_files:
        os.remove(f)
    finished_files = [
        x.split(out_suffix)[0] for x in os.listdir(output_folder)
    ]
    starting_files = sorted([
        x for x in os.listdir(os.getcwd()) if cfg["input_suffix"] in x
        and find_n_seqs(x, 1, cfg["upper_sequence_limit"]) and "_core" not in x
        and "_addit" not in x and x.split(out_suffix)[0] not in finished_files
    ])

    for fasta in starting_files:
        is_fasta(fasta)

    # Size threshold for switching from small_fastas alignment to large_fastas
    size_threshold = cfg["MAFFT_upper_limit_addfragments"]
    small_fastas = [
        x for x in starting_files if find_n_seqs(x, 1, size_threshold + 1)
    ]
    logger.debug("there are {} small_fastas".format(len(small_fastas)))

    large_fastas = [
        x for x in starting_files if not find_n_seqs(x, 1, size_threshold + 1)
    ]
    logger.debug("there are {} large_fastas".format(len(large_fastas)))

    if small_fastas:
        manager = Manager()
        fastas = manager.Queue()
        result_dict = manager.dict()
        logger.info("Running Mafft on small fasta")
        process_future_fasta(
            STEP1.run_MAFFT_small,
            small_fastas,
            result_dict,
            fastas,
            cpu_number,
            logger,
            cfg,
            tqdm_desc="Mafft on small files",
        )
    if large_fastas:
        manager = Manager()
        fastas = manager.Queue()
        result_dict = manager.dict()
        logger.info("Running Mafft on large fasta")
        process_future_fasta(
            STEP1.run_MAFFT_large,
            large_fastas,
            result_dict,
            fastas,
            cpu_number,
            logger,
            cfg,
            tqdm_desc="Mafft on large files",
        )
    logger.info("STEP 1 finished")
Beispiel #34
0
def GSO(bounds,
        num_particles,
        max_iter,
        classifier,
        train_data,
        epochs,
        batch_size,
        mini_batch_size=None):
    """
    Galactic Swarm Optimization:
    ----------------------------
    A meta-heuristic algorithm insipred by the interplay
    of stars, galaxies and superclusters under the influence
    of gravity.
    
    Input:
    ------
    M: integer 
    number of galaxies
    bounds: 
    bounds of the search space across each dimension
    [lower_bound, upper_bound] * dims
    We specify only lower_bound and upper_bound
    
    """
    subswarm_bests = []
    dims = sum([
        np.prod(np.array(layer['weights']).shape)
        for layer in classifier.layers.values()
    ])
    print("total number of weights -", dims)
    lb = bounds[0]
    ub = bounds[1]
    # lets set bounds across all dims
    bounds = [[lb, ub]] * dims
    manager = Manager()
    l = Lock()
    shared_list = manager.list()
    return_list = manager.list()
    shared_list = [np.random.uniform(lb, ub, dims), np.inf]
    all_processes = []
    #pso_batch_size = train_data[0].shape[0]//M
    g_best_weights = None
    g_best_error = float("inf")
    classifiers = [
        copy.deepcopy(classifier) for _ in range(mini_batch_size // batch_size)
    ]

    X_train, y_train = train_data
    if not mini_batch_size: mini_batch_size = X_train.shape[0]

    print('starting with gso_batch size - {}, mini_batch_size -{} '.format(
        batch_size, mini_batch_size))

    # create N particles here
    swarm_inits = []
    for j in range(mini_batch_size // batch_size):
        swarm_init = []
        for _ in range(num_particles):
            swarm_init.append(np.random.uniform(lb, ub, (1, dims)))
        swarm_inits.append(swarm_init)

    for i in tqdm(range(epochs)):
        all_processes = []
        sampler = sample_data(X_train, y_train, batch_size, mini_batch_size)
        for j in range(mini_batch_size // batch_size):
            pso_train_data = next(sampler)

            #initial= np.random.uniform(-10,10, 2)               # initial starting location [x1,x2...]
            # swarm_init = []
            # for _ in range(num_particles):
            #     swarm_init.append(np.random.uniform(lb, ub, dims))

            #pso_train_data = (data[0][k*batch_size:(k+1)*pso_batch_size], data[1][k*batch_size:(k+1)*pso_batch_size])

            # print('started batch :',i)
            # print('train_data length :', len(pso_train_data))
            #print('shape of swarm_inits[j][0]: ', swarm_inits[j][0].shape)
            swarm_init = np.array(
                [item.reshape(dims, 1) for item in swarm_inits[j]])
            p = Process(target=PSO,
                        args=(classifiers[j], bounds, max_iter, shared_list,
                              return_list, l, None, swarm_init,
                              pso_train_data))
            all_processes.append(p)

        start(all_processes)
        stop(all_processes)
        #print('elements of return list: ', return_list)
        main_swarm_init = [item[0] for item in return_list]
        #swarm_inits = [item[1] for item in return_list]
        swarm_inits = [main_swarm_init for item in return_list]
        best_weights, best_error = PSO_purana(classifier,
                                              bounds,
                                              max_iter,
                                              swarm_init=main_swarm_init,
                                              train_data=train_data)

        if best_error < g_best_error:
            g_best_error = best_error
            g_best_weights = best_weights
        print('completed epoch {} --------> loss_value: {}'.format(
            i, best_error))

    prev_index = 0
    for layer_id, layer in classifier.layers.items():
        num_elements = np.prod(
            layer['weights'].shape
        )  # we can cache this and pass it down or store it as layer.num_elements
        new_weights = g_best_weights[prev_index:prev_index + num_elements]
        layer['weights'] = new_weights.reshape(
            layer['weights'].shape
        )  # changing value midway can cause some error
        prev_index += num_elements

    return classifier
Beispiel #35
0
class DataLogger(metaclass=Singleton):
    """Stores and save various type of data under various forms."""
    @staticmethod
    def _futures_callback(future: Future):
        """Called at future completion."""
        if future.exception():
            print(
                f"Future {future} raised the exception {repr(future.exception())}"
            )

    @staticmethod
    def _push(managed, entry, value, time):
        """Push method called by the pool executors"""
        with managed.lockers[entry]:
            managed.data[entry][time] = value
            managed.counters[entry] += 1
            for f in managed.on_push_callables[entry]:
                try:
                    f(entry, managed.data[entry], path=managed.path)
                except Exception as e:
                    logging.getLogger("datalogger").warning(
                        f"{managed.name} DataLogger: function {f} of {entry} failed: {e}"
                    )

    @staticmethod
    def _dump(managed, entry):
        """Dump method called by the pool executors"""
        with managed.lockers[entry]:
            for f in managed.on_dump_callables[entry]:
                try:
                    f(entry, managed.data[entry], path=managed.path)
                except Exception as e:
                    logging.getLogger("datalogger").warning(
                        f"{managed.name} DataLogger: function {f} of {entry} failed: {e}"
                    )

    @staticmethod
    def _reset(managed, entry):
        """Inner reset method called by the pool executor"""
        with managed.lockers[entry]:
            for f in managed.on_reset_callables[entry]:
                try:
                    f(entry, managed.data[entry], path=managed.path)
                except Exception as e:
                    logging.getLogger("datalogger").warning(
                        f"{managed.name} DataLogger: function {f} of {entry} failed: {e}"
                    )
            managed.data[entry].clear()
            managed.counters[entry] = 0

    def __init__(self):
        # Init and set attributes
        super(DataLogger, self).__init__()
        # Managed resources (accessible by remote threads or remote processes)
        self._manager = Manager()
        self._managed = self._manager.Namespace()
        self._managed.name = "data-logger"
        self._managed.path = "."
        self._managed.entries = self._manager.list()
        self._managed.data = self._manager.dict()
        self._managed.lockers = self._manager.dict()
        self._managed.counters = self._manager.dict()
        self._managed.on_push_callables = self._manager.dict()
        self._managed.on_reset_callables = self._manager.dict()
        self._managed.on_dump_callables = self._manager.dict()

        self.tick = datetime.datetime.now()
        self.futures = list()
        self.pool = ThreadPoolExecutor(max_workers=1)
        # Log
        logging.getLogger("datalogger").info(
            "{} DataLogger initialized!".format(self._managed.name))

    def set_path(self, path):
        """Sets the root path of the logger. Used by all the handlers that write on disk.

        :param string path: A valid path to write the data in.
        """
        if len(self._managed.lockers) != 0:
            raise Exception(
                "You tried to change logger path after having registered some entries."
            )
        os.makedirs(path, exist_ok=True)
        self._managed.path = path

    def set_pool(self, pool, n_par=5):
        """Sets the executor to be used to call handlers.

        :param string pool: The type of executor to use to call handlers. Either "thread" or "process".
        :param int n_par: The number of executor to use.
        """
        if len(self._managed.lockers) != 0:
            raise Exception(
                "You tried to pool after having registered some entries.")
        if pool == "thread":
            self.pool = ThreadPoolExecutor(max_workers=n_par)
        elif pool == "process":
            self.pool = ProcessPoolExecutor(max_workers=n_par)
        else:
            raise Exception(f"Unknown pool type `{pool}`")

    def set_name(self, name):
        """Sets the name of the logger.

        :param string name: Name of the logger
        """
        self._managed.name = name

    def declare(self, entry, on_push_callables, on_dump_callables,
                on_reset_callables):
        """Register a recurring log entry.

        Registering an entry gives access to the `push`, `reset` and `dump` methods. Note that all the handlers must be
        able to handle the data that will be pushed.

        :param string entry: Name of the log entry.
        :param List[handlers] on_push_callables: Handlers called on data when `push` is called.
        :param List[handlers] on_reset_callables: Handlers called on data when `reset` is called.
        :param List[handlers] on_dump_callables: Handlers called on the data when `dump` is called.
        """
        if entry in self._managed.entries:
            raise Exception("You tried to declare an existing log entry")
        self._managed.entries.append(entry)
        self._managed.lockers[entry] = self._manager.RLock()
        self._managed.data[entry] = self._manager.dict()
        self._managed.counters[entry] = 0
        self._managed.on_push_callables[entry] = self._manager.list(
            on_push_callables)
        self._managed.on_reset_callables[entry] = self._manager.list(
            on_reset_callables)
        self._managed.on_dump_callables[entry] = self._manager.list(
            on_dump_callables)
        if os.path.dirname(entry) != "":
            os.makedirs(os.path.join(self._managed.path,
                                     os.path.dirname(entry)),
                        exist_ok=True)

    def push(self, entry, value, time=None):
        """Append data to a recurring log.

        All handlers registered for the `on_push` event will be called.

        :param string entry: Name of the log entry
        :param Any value: Object containing the data to log. Should be of same type from call to call...
        :param int or None time: Date of the logging (epoch, iteration, tic ...). Will be used as key in the data
        dictionary. If `None`, the last data key plus one will be used.
        """
        future = self.pool.submit(
            DataLogger._push, self._managed, entry, value,
            time if time is not None else self._managed.counters[entry])
        future.add_done_callback(DataLogger._futures_callback)
        self.futures.append(future)

    def dump(self):
        """Calls handlers declared for `on_dump` event, for all registered log entries.
        """
        for entry in self._managed.entries:
            future = self.pool.submit(DataLogger._dump, self._managed, entry)
            future.add_done_callback(DataLogger._futures_callback)
            self.futures.append(future)

    def reset(self, entry):
        """Resets the data of a recurring log entry.

        All handlers registered for the `on_reset` event will be called before the storage is emptied.

        :param string entry: name of the log entry.
        """

        future = self.pool.submit(DataLogger._reset, self._managed, entry)
        future.add_done_callback(DataLogger._futures_callback)
        self.futures.append(future)

    def get_entry_length(self, entry):
        """Retrieves the number of data saved for a log entry.

        :param string entry: Name of the log entry
        :return: Number of data pieces in the entry storage
        :rtype: int
        """
        return self._managed.counters[entry]

    def get_serie(self, entry):
        """Returns the data in a list ordered by keys.

        :param string entry: Name of the log entry
        :return: Serie of data ordered by key
        :rtype: List[any]
        """
        return [i[1] for i in sorted(self._managed.data[entry].items())]

    def wait(self, log_durations=True):
        """Wait for the handling queue to be emptied.

        :param bool log_durations: Whether to log the wait duration.
        """
        b = datetime.datetime.now()
        while True:
            self.futures = list(filter(lambda x: not x.done(), self.futures))
            if self.futures:
                time.sleep(.1)
            else:
                break
        if log_durations:
            logging.getLogger("datalogger").info(
                f"{self._managed.name} DataLogger: Last wait occured {self.tick - b} ago."
            )
            logging.getLogger("datalogger").info(
                f"{self._managed.name} DataLogger: Waited {datetime.datetime.now() - b} for completion."
            )
        self.tick = datetime.datetime.now()
Beispiel #36
0
def build_opts(opts):
    """Trigger a new process that builds the workflow graph, based on the input options."""
    import os
    from pathlib import Path
    import logging
    import sys
    import gc
    import warnings
    from multiprocessing import set_start_method, Process, Manager
    from nipype import logging as nlogging
    from niworkflows.utils.misc import check_valid_fs_license

    set_start_method("forkserver")

    logging.addLevelName(
        25, "IMPORTANT")  # Add a new level between INFO and WARNING
    logging.addLevelName(15,
                         "VERBOSE")  # Add a new level between INFO and DEBUG
    logger = logging.getLogger("cli")

    def _warn_redirect(message,
                       category,
                       filename,
                       lineno,
                       file=None,
                       line=None):
        logger.warning("Captured warning (%s): %s", category, message)

    warnings.showwarning = _warn_redirect

    # Precedence: --fs-license-file, $FS_LICENSE, default_license
    if opts.fs_license_file is not None:
        os.environ["FS_LICENSE"] = os.path.abspath(opts.fs_license_file)

    if not check_valid_fs_license():
        raise RuntimeError(
            "ERROR: a valid license file is required for FreeSurfer to run. "
            "sMRIPrep looked for an existing license file at several paths, in this "
            "order: 1) command line argument ``--fs-license-file``; 2) ``$FS_LICENSE`` "
            "environment variable; and 3) the ``$FREESURFER_HOME/license.txt`` path. "
            "Get it (for free) by registering at https://"
            "surfer.nmr.mgh.harvard.edu/registration.html")

    # Retrieve logging level
    log_level = int(max(25 - 5 * opts.verbose_count, logging.DEBUG))
    # Set logging
    logger.setLevel(log_level)
    nlogging.getLogger("nipype.workflow").setLevel(log_level)
    nlogging.getLogger("nipype.interface").setLevel(log_level)
    nlogging.getLogger("nipype.utils").setLevel(log_level)

    errno = 0

    # Call build_workflow(opts, retval)
    with Manager() as mgr:
        retval = mgr.dict()
        p = Process(target=build_workflow, args=(opts, retval))
        p.start()
        p.join()

        if p.exitcode != 0:
            sys.exit(p.exitcode)

        smriprep_wf = retval["workflow"]
        plugin_settings = retval["plugin_settings"]
        bids_dir = retval["bids_dir"]
        output_dir = retval["output_dir"]
        subject_list = retval["subject_list"]
        run_uuid = retval["run_uuid"]
        retcode = retval["return_code"]

    if smriprep_wf is None:
        sys.exit(1)

    if opts.write_graph:
        smriprep_wf.write_graph(graph2use="colored",
                                format="svg",
                                simple_form=True)

    if opts.reports_only:
        sys.exit(int(retcode > 0))

    if opts.boilerplate:
        sys.exit(int(retcode > 0))

    # Check workflow for missing commands
    missing = check_deps(smriprep_wf)
    if missing:
        print("Cannot run sMRIPrep. Missing dependencies:")
        for iface, cmd in missing:
            print("\t{} (Interface: {})".format(cmd, iface))
        sys.exit(2)

    # Clean up master process before running workflow, which may create forks
    gc.collect()
    try:
        smriprep_wf.run(**plugin_settings)
    except RuntimeError:
        errno = 1
    else:
        if opts.run_reconall:
            from templateflow import api
            from niworkflows.utils.misc import _copy_any

            dseg_tsv = str(
                api.get("fsaverage", suffix="dseg", extension=[".tsv"]))
            _copy_any(
                dseg_tsv,
                str(Path(output_dir) / "smriprep" / "desc-aseg_dseg.tsv"))
            _copy_any(
                dseg_tsv,
                str(Path(output_dir) / "smriprep" / "desc-aparcaseg_dseg.tsv"))
        logger.log(25, "sMRIPrep finished without errors")
    finally:
        from niworkflows.reports import generate_reports
        from ..utils.bids import write_derivative_description, write_bidsignore

        logger.log(25, "Writing reports for participants: %s",
                   ", ".join(subject_list))
        # Generate reports phase
        errno += generate_reports(subject_list,
                                  output_dir,
                                  run_uuid,
                                  packagename="smriprep")
        write_derivative_description(bids_dir,
                                     str(Path(output_dir) / "smriprep"))
        write_bidsignore(Path(output_dir) / "smriprep")
    sys.exit(int(errno > 0))
Beispiel #37
0
        # Send frame to global
        write_frame_list[worker_id] = frame_process

        # Expect next worker to write frame
        Global.write_num = next_id(Global.write_num, worker_num)


if __name__ == '__main__':

    # Fix Bug on MacOS
    if platform.system() == 'Darwin':
        set_start_method('forkserver')

    # Global variables
    Global = Manager().Namespace()
    Global.buff_num = 1
    Global.read_num = 1
    Global.write_num = 1
    Global.frame_delay = 0
    Global.is_exit = False
    read_frame_list = Manager().dict()
    write_frame_list = Manager().dict()

    # Number of workers (subprocess use to process frames)
    if cpu_count() > 2:
        worker_num = cpu_count() - 1  # 1 for capturing frames
    else:
        worker_num = 2

    # Subprocess list
Beispiel #38
0
def main():
    print('blas -', dlib.DLIB_USE_BLAS)
    print('cuda -', dlib.DLIB_USE_CUDA)
    print('lapack -', dlib.DLIB_USE_LAPACK)
    print('avx -', dlib.USE_AVX_INSTRUCTIONS)
    print('neon -', dlib.USE_NEON_INSTRUCTIONS)

    manager = Manager()

    #количество лиц
    count = Value('i', 0)

    time_det = Value('d', 0.0)
    time_count = Value('d', 0.0)

    dets_q = manager.Queue(1)
    images_q = manager.Queue(25)

    #очередь для процесса детектирования
    q_for_detproc = manager.Queue(1)

    #очередь для процесса распознования и подсчета
    q_for_countproc = manager.Queue(1)

    Process(target=counting_process,
            args=(q_for_countproc, count, time_count),
            daemon=True).start()
    #Process(target=capturing_process, args=(images_q, q_for_detproc), daemon=True).start()
    Process(target=detecting_process,
            args=(q_for_detproc, dets_q, q_for_countproc, time_det),
            daemon=True).start()
    font = cv2.FONT_HERSHEY_SIMPLEX
    counter = 0
    trackers = []
    cap = cv2.VideoCapture(0)
    # cap.set(3,900)
    # cap.set(4,900)

    while True:

        if cap.isOpened():

            img = cap.read()[1]
            #img = imutils.resize(img, width=1000)

            if images_q.full():
                images_q.get()
                images_q.put(img)

            else:
                images_q.put(img)

            if q_for_detproc.empty():
                q_for_detproc.put(img)

        else:
            break

        if images_q.qsize() == 24:
            frame = images_q.get()
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            if counter % 25 == 0:
                counter = 0
                trackers = []
                if not dets_q.empty():

                    dets, rgb = dets_q.get()
                    for d in dets:
                        tracker = dlib.correlation_tracker()
                        tracker.start_track(rgb, d)
                        trackers.append(tracker)

            elif len(trackers) > 0:
                for tracker in trackers:
                    confidence = tracker.update(rgb)
                    if confidence > 7:
                        drect = tracker.get_position()
                        left, top, right, bottom = tuple(
                            map(int, (drect.left(), drect.top(), drect.right(),
                                      drect.bottom())))
                        cv2.rectangle(frame, (left, top), (right, bottom),
                                      (0, 0, 255), 2)

            counter += 1
            height, width = frame.shape[:2]
            cv2.putText(frame, str(count.value), (width - 100, height - 100),
                        font, 4, (255, 255, 255), 2, cv2.LINE_AA)
            cv2.imshow('img', frame)
            k = cv2.waitKey(25) & 0xff
            if k == 27:
                break

            print('detecting time = ', time_det.value)
            print('counting time = ', time_count.value)
    cap.release()
Beispiel #39
0
    print('Using default iterations', iterations)

# read data file
data_array = np.loadtxt(filename, delimiter=',')
# number of training examples and features
number_of_examples = data_array.shape[0] 
number_of_parameters = data_array.shape[1]
# add column of ones to dataframe  make looping symmetric 
ones = np.ones(number_of_examples)
data_array = np.insert(data_array, 0, values=ones, axis=1)
# initialize theta to zeroes array
theta = np.zeros(number_of_parameters)
parameters_split = np.array_split(list(range(len(theta))), 4)

if __name__ == '__main__':
    manager = Manager()
    results = manager.dict()
    start = time.time()
    for i in range(iterations):
        processes = []
        # multiple process to caculate theta (4 sublist of theta)
        for j in range(len(parameters_split)):
            processes.append(Process(target=gradientDescent, args=(parameters_split[j], results)))
        theta_diff = []
        for p in processes:
            p.start()
        for p in processes:
            p.join()
        # here to merge
        for j in range(len(theta)):
            theta_diff.append(results[j])
Beispiel #40
0
def one_run(config):
    test_environment = environment.EnvRoad(config)
    road_environment = environment.EnvRoad(config)

    memory_queue = Queue()
    update_p_queue = Queue()
    batch_queue = Queue()

    environment_queue = Queue()
    test_environment_queue = Queue()

    agent_performance_queue = Queue()
    learner_performance_queue = Queue()
    test_agent_performance_queue = Queue()

    priority_environment_queue = Queue()

    manger_weights = Manager()
    weights = manger_weights.dict()
    lock_weights = manger_weights.Lock()

    p_agent = Process(target=thread_agent,
                      args=(config, road_environment, memory_queue,
                            environment_queue, agent_performance_queue,
                            weights, lock_weights))
    p_learn = Process(target=thread_learner,
                      args=(config, batch_queue, learner_performance_queue,
                            update_p_queue, weights, lock_weights))
    p_mem = Process(target=thread_memory,
                    args=(config, memory_queue, batch_queue, update_p_queue,
                          priority_environment_queue))
    p_agent_test = Process(target=thread_agent_test,
                           args=(config, test_environment,
                                 test_environment_queue,
                                 test_agent_performance_queue, weights,
                                 lock_weights, memory_queue))
    p_agent_priority = Process(
        target=thread_agent_priority,
        args=(config, test_environment, priority_environment_queue,
              memory_queue, test_environment_queue, weights, lock_weights))

    start_time = time.time()
    p_agent.start()
    p_mem.start()
    p_learn.start()
    p_agent_test.start()
    p_agent_priority.start()

    if config.display:
        p_display_game = Process(target=thread_game_display,
                                 args=(road_environment, environment_queue))
        p_display_test = Process(target=thread_test_display,
                                 args=(test_environment,
                                       test_environment_queue))
        p_display_game.start()
        p_display_test.start()

        display_agent = displays.AgentDisplays(config)
        display_learner = displays.LearnerDisplays(config)
        display_test = displays.TestDisplays(config)

    learner_performance = [[], [], []]
    agent_performance = [[], []]
    test_agent_performance = [[], []]

    end = False

    # Display all performance metrics
    while True:
        while agent_performance_queue.qsize() > 0:
            agent_performance = agent_performance_queue.get()
            if len(agent_performance[0]) == len(
                    agent_performance[1]) and config.display:
                display_agent.render(agent_performance[0],
                                     agent_performance[1])
            if len(agent_performance[0]) > config.max_step:
                end = True

        while learner_performance_queue.qsize() > 0:
            learner_performance = learner_performance_queue.get()
            if len(learner_performance[0]) == len(
                    learner_performance[1]) == len(
                        learner_performance[2]) and config.display:
                display_learner.render(learner_performance[0],
                                       learner_performance[1],
                                       learner_performance[2])

        while test_agent_performance_queue.qsize() > 0:
            test_agent_performance = test_agent_performance_queue.get()
            if len(test_agent_performance[0]) == len(
                    test_agent_performance[1]) and config.display:
                display_test.render(test_agent_performance[0],
                                    test_agent_performance[1])

        if end:
            if config.display:
                displays.close_all()
                p_display_game.terminate()
                p_display_test.terminate()
            break

    p_agent.terminate()
    p_mem.terminate()
    p_learn.terminate()
    p_agent_test.terminate()
    p_agent_priority.terminate()
    print('Average time per step: ',
          str((time.time() - start_time) / config.max_step))

    return agent_performance[0], agent_performance[1], learner_performance[0], learner_performance[1], \
           learner_performance[0], learner_performance[2], test_agent_performance[0], test_agent_performance[1]
Beispiel #41
0
from multiprocessing import Queue, Manager


def weight():
    weight = []
    dis_with_w = []
    for i in range(0, 2000):
        weight.append(
            (npy.max(npy.std(vectors, axis=0)) - npy.std(vectors[:, i])) /
            (npy.sum(
                npy.max(npy.std(vectors, axis=0)) - npy.std(vectors, axis=0))))
    return weight


if __name__ == "__main__":
    manager = Manager()
    queue = manager.Queue()
    vectors = npy.ones((1, 2000))
    queue.put(vectors)
    for x in range(0, 40):  # 30 -> pos,10 -> baseline
        mfcc = MFCC('/vibration.wav', queue)
        play = Play()
        play.start()
        time.sleep(0.16)
        mfcc.start()

        play.join()
        mfcc.join()
        time.sleep(1)
        if x == 29:
            print("请抬起手")
Beispiel #42
0
def main():
    """
        -   Load all the Jupiter confuguration
        -   Load DAG information. 
        -   Prepare all of the tasks based on given DAG information. 
        -   Prepare the list of children tasks for every parent task
        -   Generating monitoring process for ``INPUT`` folder.
        -   Generating monitoring process for ``OUTPUT`` folder.
        -   If there are enough input files for the first task on the current node, run the first task. 

    """

    global logging
    logging.basicConfig(level=logging.DEBUG)

    INI_PATH = '/jupiter_config.ini'
    config = configparser.ConfigParser()
    config.read(INI_PATH)

    global dag
    dag_file = '/centralized_scheduler/dag.txt'
    dag_info = k8s_read_dag(dag_file)
    dag = dag_info[1]

    # Prepare transfer-runtime file:
    global runtime_sender_log, RUNTIME, TRANSFER, transfer_type
    RUNTIME = int(config['CONFIG']['RUNTIME'])
    TRANSFER = int(config['CONFIG']['TRANSFER'])

    global app_name, app_option
    app_name = os.environ['APP_NAME']
    app_option = os.environ['APP_OPTION']

    if TRANSFER == 0:
        transfer_type = 'scp'

    runtime_sender_log = open(
        os.path.join(os.path.dirname(__file__), 'runtime_transfer_sender.txt'),
        "w")
    s = "{:<10} {:<10} {:<10} {:<10} \n".format('Node_name', 'Transfer_Type',
                                                'File_Path', 'Time_stamp')
    runtime_sender_log.write(s)
    runtime_sender_log.close()
    runtime_sender_log = open(
        os.path.join(os.path.dirname(__file__), 'runtime_transfer_sender.txt'),
        "a")
    #Node_name, Transfer_Type, Source_path , Time_stamp

    if RUNTIME == 1:
        global runtime_receiver_log
        runtime_receiver_log = open(
            os.path.join(os.path.dirname(__file__),
                         'runtime_transfer_receiver.txt'), "w")
        s = "{:<10} {:<10} {:<10} {:<10} \n".format('Node_name',
                                                    'Transfer_Type',
                                                    'File_path', 'Time_stamp')
        runtime_receiver_log.write(s)
        runtime_receiver_log.close()
        runtime_receiver_log = open(
            os.path.join(os.path.dirname(__file__),
                         'runtime_transfer_receiver.txt'), "a")
        #Node_name, Transfer_Type, Source_path , Time_stamp

    global FLASK_SVC, FLASK_DOCKER, MONGO_PORT, username, password, ssh_port, num_retries, task_mul, count_dict, self_ip, home_ips, home_ids

    FLASK_DOCKER = int(config['PORT']['FLASK_DOCKER'])
    FLASK_SVC = int(config['PORT']['FLASK_SVC'])
    MONGO_PORT = int(config['PORT']['MONGO_DOCKER'])
    username = config['AUTH']['USERNAME']
    password = config['AUTH']['PASSWORD']
    ssh_port = int(config['PORT']['SSH_SVC'])
    num_retries = int(config['OTHER']['SSH_RETRY_NUM'])
    self_ip = os.environ['OWN_IP']
    home_nodes = os.environ['HOME_NODE'].split(' ')
    home_ids = [x.split(':')[0] for x in home_nodes]
    home_ips = [x.split(':')[1] for x in home_nodes]

    global taskmap, taskname, taskmodule, filenames, files_out, home_node_host_ports
    global all_nodes, all_nodes_ips, self_id, self_name, self_task
    global all_computing_nodes, all_computing_ips, node_ip_map, controller_id_map

    configs = json.load(open('/centralized_scheduler/config.json'))
    taskmap = configs["taskname_map"][sys.argv[len(sys.argv) - 1]]
    taskname = taskmap[0]

    global tasks, task_order, super_tasks, non_tasks
    tasks, task_order, super_tasks, non_tasks = get_taskmap()

    global controller_nondag, controller_ip_nondag
    controller_nondag = []
    controller_ip_nondag = []

    global all_nodes_list, all_nodes_ips_list
    all_nodes_list = os.environ['ALL_NODES'].split(':')
    all_nodes_ips_list = os.environ['ALL_NODES_IPS'].split(':')

    all_computing_nodes = os.environ["ALL_COMPUTING_NODES"].split(":")
    all_computing_ips = os.environ["ALL_COMPUTING_IPS"].split(":")
    all_nodes = all_computing_nodes + home_ids
    all_nodes_ips = all_computing_ips + home_ips

    global BOKEH_SERVER, BOKEH_PORT, BOKEH
    BOKEH_SERVER = config['BOKEH_LIST']['BOKEH_SERVER']
    BOKEH_PORT = int(config['BOKEH_LIST']['BOKEH_PORT'])
    BOKEH = int(config['BOKEH_LIST']['BOKEH'])

    for idx, controller in enumerate(all_nodes_list):
        if controller in super_tasks:
            logging.debug(controller)
            logging.debug(all_nodes_ips_list[idx])
            controller_nondag.append(controller)
            controller_ip_nondag.append(all_nodes_ips_list[idx])

            all_nodes.append(controller)
            all_nodes_ips.append(all_nodes_ips_list[idx])

    if taskmap[1] == True:
        taskmodule = __import__(taskname)

    #target port for SSHing into a container
    filenames = []
    files_out = []
    self_name = os.environ['NODE_NAME']
    self_id = os.environ['NODE_ID']
    self_task = os.environ['TASK']
    controller_id_map = self_task + "#" + self_id
    home_node_host_ports = [x + ":" + str(FLASK_SVC) for x in home_ips]
    node_ip_map = dict(zip(all_nodes, all_nodes_ips))

    global dest_node_host_port_list
    dest_node_host_port_list = [
        ip + ":" + str(FLASK_SVC) for ip in all_computing_ips
    ]

    global task_price_cpu, task_node_summary, task_price_mem, task_price_queue, task_price_net
    manager = Manager()
    task_price_cpu = manager.dict()
    task_price_mem = manager.dict()
    task_price_queue = manager.dict()
    task_price_net = manager.dict()
    task_node_summary = manager.dict()

    global pass_time
    pass_time = dict()

    # Set up default value for task_node_summary: the task controller will perform the tasks also

    _thread.start_new_thread(push_controller_map, ())

    web_server = MonitorRecv()
    web_server.run()

    if taskmap[1] == True:
        task_mul = manager.dict()
        count_dict = manager.dict()

    else:

        path_src = "/centralized_scheduler/" + taskname
        args = ' '.join(str(x) for x in taskmap[2:])

        if os.path.isfile(path_src + ".py"):
            cmd = "python3 -u " + path_src + ".py " + args
        else:
            cmd = "sh " + path_src + ".sh " + args
        os.system(cmd)
Beispiel #43
0
def worker_process(server, dict_proxy, queue_proxy):
    thread_pool = ThreadPool(cpu_count() * 2)
    while True:
        conncetion, remote_address = server.accept()
        data = "".encode()
        login_try(conncetion, thread_pool, dict_proxy, queue_proxy, data)


if __name__ == '__main__':

    server = socket.socket()
    server.bind(('127.0.0.1', 9999))
    server.listen(1000)

    mgr = Manager()
    dict_proxy = mgr.dict()  #用来保存连接上来的客户端,
    queue_proxy = mgr.Queue()  #把客户端发过来的消息通过队列传递

    n = cpu_count()  #打印当前电脑的cpu核数
    process_pool = Pool(n)
    for i in range(n - 1):  #充分利用CPU,为每一个CPU分配一个进程
        process_pool.apply_async(worker_process,
                                 args=(server, dict_proxy,
                                       queue_proxy))  #把server丢到两个进程里面

    process_pool.apply_async(send_data,
                             args=(dict_proxy, queue_proxy))  #用一个进程去收发消息

    process_pool.close()
    process_pool.join()
def extract_descriptors_from_file_to_pickle(inputfile, outputfile, num_pos_sample=0):
    print("Working on: " + str(inputfile))
    print(" ")
    s_read_seq = time.time()
    if reduce_by_similarity == 1:
        if "_reduced" in inputfile:
            print("File already reduced to be maximum 90 percent identical! Clear reduce_by_similarity!")
            input()
        elif ".txt" in inputfile:
            name = inputfile.replace('.txt', '')
            file_to_reduce = open(inputfile)
            lines = file_to_reduce.readlines()
            if num_pos_sample != 0:
                lines = lines[:round(sc_1*num_pos_sample)]
            line_number = len(lines)
            file_to_reduce.close()
        elif ".fasta" in inputfile:
            name = inputfile.replace('.fasta', '')
            lines = IO.read_fasta_file(inputfile)
            lines = [str(line) for line in lines]
            if num_pos_sample != 0:
                lines = lines[:round(sc_1*num_pos_sample)]
            line_number = len(lines)
        else:
            print("Unknown file format! Use .fasta or .txt! Press CTRL-C to exit")
            input()

        out = name + "_reduced.txt"
        deleted = []
        sim_array = np.zeros((line_number, line_number))

        for i in list(range(line_number)):
            print("Doing line %d out of %d" %(i, line_number))
            string1 = lines[i].strip()
            for j in list(range(i+1, line_number)):
                #print(j)
                string2 = lines[j].strip()
                if similar(string1, string2) >= 0.9:
                    sim_array[i,j] = 1
                    sim_array[j,i] = 1

        while np.sum(np.sum(sim_array, 0)) != 0:
            sum_arr = np.sum(sim_array, 0)
            idx_to_be_deleted = np.argmax(sum_arr)
            sim_array = np.delete(sim_array, idx_to_be_deleted, 0)
            sim_array = np.delete(sim_array, idx_to_be_deleted, 1)
            deleted.append(lines[idx_to_be_deleted])
            del lines[idx_to_be_deleted]

        print("Deleted items:")
        [print(item) for item in deleted]

        f = open(out, "w+")
        for line in lines:
            f.write(line)
            f.write("\n")
        f.close()

        inputfile = out

    if ".txt" in inputfile:
        seqs = []
        with open(inputfile) as f:
            for line in f:
                seqs.append(line.strip()) #strip is important otherwis /n issue!
        inputfile = inputfile.replace("_reduced.txt", "")
    elif ".fasta" in inputfile:
        seqs = IO.read_fasta_file(inputfile)
        inputfile = inputfile.replace("_reduced.fasta", "")
    else:
        print("Unknown file format! Use .fasta or .txt! Press CTRL-C to exit")
        input()
    e_read_seq = time.time()
    print("Total time to read sequences: " + str(e_read_seq - s_read_seq))
    print(str(len(seqs)))
    chars = set('ARNDCQEGHILKMFPSTWYV')

    if inputfile in negfile:
        if num_pos_sample == 0:
            print ("Error, use Ctrl-C to quit")
            input()
        print(num_pos_sample)
        if num_pos_sample > len(seqs):
            print("Warning: Class imbalance may not be achieved! Click any button to accept or CTRL-C to exit")
            input()
        a = random.sample(range(1, len(seqs)), round(sc_2*num_pos_sample)) #if total_samples is big, you may want to divide total_samples (by 18) and round it
        newseqs = []
        i = 1
        for number in a:
            print(i)
            if len(seqs[number]) > minlength and all((c in chars) for c in seqs[number].upper()):
                newseqs.append(seqs[number])
                print(seqs[number])
                i = i+1
            if i > num_pos_sample:
                break
        seqs = newseqs
    #s_x_desc = time.time()
    dvecs = Manager().list()
    current_seq = Value('i', 1)
    dropped = 0
    lock = Lock()
    seqs = [s.upper() for s in seqs]
    mask = [all((c in chars) for c in s) and len(s) > minlength for s in seqs]
    seqs = list(compress(seqs, mask))
    total_samples = len(seqs)
    pool = Pool(numcores, initializer, (current_seq, dvecs, total_samples, lock))
    s_parallel = time.time()
    pool.map(thefunction, seqs)
    e_parallel = time.time()
    #pool.close()
    #pool.join()
    print("Total time to extract descriptors: " + str(e_parallel - s_parallel))
    if inputfile in posfile:
        num_pos_sample = len(dvecs)
        print("Number of positive samples: %d" %(num_pos_sample))
    #e_x_desc = time.time()
    #print("Total time to extract descriptors: " + str(e_x_desc - s_x_desc))
    print("Number of samples dropped due to meaningless characters: %d" %(dropped))

    y = dvecs._callmethod('__getitem__', (slice(1, total_samples+1),)) #THIS IS THE SOLUTION TO MAKE PICKLE WORK!!!!!!
    IO.serialize_descriptor_vector(y, o_file=outputfile)

    return num_pos_sample
Beispiel #45
0
class ReaderWriterLock(object):
    def __init__(self):
        self.num_readers_lock = Manager().Lock()
        self.writers_lock = Manager().Lock()
        self.num_readers = 0
        self.now_writing = False

    def some_worker_is_reading(self):
        return self.num_readers > 0

    def some_worker_is_writing(self):
        return self.now_writing is True

    def lock_writing_and_reading(self):
        self.writers_lock.acquire()  # first things first - block all other writers
        self.now_writing = True  # block new readers who haven't started reading yet
        while self.some_worker_is_reading():  # let existing readers finish their homework
            time.sleep(0.05)

    def release_writing_and_reading(self):
        self.now_writing = False  # release readers - guarantee no readers starvation
        self.writers_lock.release()  # release writers

    def lock_writing(self):
        while self.now_writing:
            time.sleep(0.05)

        self.num_readers_lock.acquire()
        self.num_readers += 1
        self.num_readers_lock.release()

    def release_writing(self):
        self.num_readers_lock.acquire()
        self.num_readers -= 1
        self.num_readers_lock.release()
Beispiel #46
0
def sub_cmd_multisearch(args):
    if not (args.m and args.sc):
        exit(1)

    config = xq.get_strategy_config(args.sc)
    pprint.pprint(config)

    module_name = config["module_name"].replace("/", ".")
    class_name = config["class_name"]
    symbol = config['symbol']
    md = DBMD(args.m, kl.KLINE_DATA_TYPE_JSON)
    start_time, end_time = get_time_range(md, symbol, args.r)

    count = args.count
    cpus = cpu_count()
    print("count: %s,  cpus: %s" % (count, cpus) )

    result_q = Manager().Queue()#Manager中的Queue才能配合Pool
    task_q = Manager().Queue()#Manager中的Queue才能配合Pool
    for index in range(count):
        task_q.put(index)

    print('Parent process %s.' % os.getpid())
    p = Pool(cpus)
    for i in range(cpus):
        #p.apply_async(child_process_test, args=(i, task_q, result_q))
        p.apply_async(child_process, args=(i, task_q, result_q, args.m, config, module_name, class_name, start_time, end_time))
    print('Waiting for all subprocesses done...')
    p.close()

    start_time = datetime.now()
    result = []
    while len(result) < count:
        if result_q.empty():
            time.sleep(1)
        else:
            value = result_q.get()
            print("result value: ", value)
            result.append(value)

        sys.stdout.write(
            "  %d/%d,  cost: %s,  progress: %g%% \r"
            % (
                len(result),
                count,
                datetime.now() - start_time,
                round((len(result) / count) * 100, 2)
            )
        )
        sys.stdout.flush()

    print("")
    #print("result queue(len: %s)" % (result_q.qsize()))

    p.join()
    print('All subprocesses done.')

    sorted_rs = sorted(result, key=lambda x: x[1][0], reverse=True)
    for r in sorted_rs:
        #print("r: ", r)
        info = "%6s    %30s    %s " % r
        print(info)
def do_work(in_queue, out_list):
    while True:
        art = in_queue.get()
        if art == None:
            break
        else:
          result = multipro(art)
          out_list.append(result)
          # return result



import itertools

num_workers = 3
manager = Manager()
results = manager.list()
work = manager.Queue(num_workers)
pool_lst = []
for i in range(num_workers):
    p = Process(target=do_work, args=(work, results))
    pool_lst.append(p)
    p.start()

articles = articles*5000000

articles = itertools.chain(articles, (None,)*num_workers)

for i in articles:
    work.put(i)
Beispiel #48
0
 def __init__(self):
     self.num_readers_lock = Manager().Lock()
     self.writers_lock = Manager().Lock()
     self.num_readers = 0
     self.now_writing = False
    def __init__(self, host=None, username=None, password=None, fresh=False):

        # database
        self.host = host
        self.username = username
        self.password = password
        self.databaseName = 'XRP_Ledger'
        self.collectionsList = ['accounts', 'transactions']
        self.collections = {}
        self.edgeCollectionsList = ['transactionOutput']
        self.edgeCollections = {}

        # processes
        self.maxProcess = int(cpu_count() / 2)
        self.batchSize = 500
        self.maxQueueSize = self.batchSize * self.maxProcess

        # queue
        self.accountsQueue = Manager().Queue(maxsize=self.maxQueueSize)
        self.transactionsQueue = Manager().Queue(maxsize=self.maxQueueSize)
        self.transactionsOutputQueue = Manager().Queue(
            maxsize=self.maxQueueSize)

        # tracking
        self.lastStoredSeq = None

        # create connection
        try:
            conn = Connection(arangoURL=host,
                              username=username,
                              password=password)
        except ConnectionError:
            print("Unable to establish connection to the database")
            sys.exit(1)

        # setup database
        try:
            db = conn.createDatabase(name=self.databaseName)
        except CreationError:
            db = conn[self.databaseName]

        if fresh:
            for collection in self.collectionsList + self.edgeCollectionsList:
                if db.hasCollection(collection):
                    db.collections[collection].delete()
            db.reload()

        # setup collections
        for collection in self.collectionsList:
            if not db.hasCollection(collection):
                db.createCollection(name=collection, className='Collection')

        # setup edge collections
        for edge in self.edgeCollectionsList:
            if not db.hasCollection(edge):
                db.createCollection(name=edge, className='Edges')

        # set last processed ledger seq
        aql = "FOR tx IN transactions SORT tx.LedgerIndex DESC LIMIT 1 RETURN tx.LedgerIndex"
        queryResult = db.AQLQuery(aql, rawResults=True)
        if len(queryResult) > 0:
            self.lastStoredSeq = queryResult[0]

        # run the threads
        self.processes = []

        for i in range(self.maxProcess):
            self.processes.append(
                BulkInsert(self.get_connection('accounts'), self.accountsQueue,
                           self.batchSize))
            self.processes.append(
                BulkInsert(self.get_connection('transactions'),
                           self.transactionsQueue, self.batchSize))
            self.processes.append(
                BulkInsert(self.get_connection('transactionOutput'),
                           self.transactionsOutputQueue, self.batchSize))

        for t in self.processes:
            t.start()
import os
from time import sleep


def fuc_w(num, queue):
    print('start {0}:{1}'.format(num, os.getpid()))
    for i in range(5):
        queue.put(i)
    sleep(1)
    print('finish %s' % num)


def fuc_r(num, queue):
    print('start {0}:{1}'.format(num, os.getpid()))
    if not queue.empty():
        for i in range(queue.qsize()):
            print(queue.get(block=True, timeout=5))
    sleep(1)
    print('finish %s' % num)


if __name__ == '__main__':
    print('main process:%s' % os.getpid())
    # 指定队列大小,空则为无限
    queue = Manager().Queue(5)
    pool = Pool(processes=5)
    pool.apply(func=fuc_w, args=('write', queue))
    pool.apply(func=fuc_r, args=('read', queue))
    pool.close()
    pool.join()
Beispiel #51
0
if __name__ == "__main__":
    test_1 = False
    test_2 = True
    if test_1:
        mean_vals = []
        for i in range(10):
            latency_dict = []
            iter_val = 100
            in_order(latency_dict, iter_val)
            mean_val = sum(latency_dict) / len(latency_dict)
            mean_vals.append(mean_val)
        out = sum(mean_vals) / len(mean_vals)
        print(mean_vals)
        print("MEAN ONE REQUEST", out)
    if test_2:
        manager = Manager()
        return_dict = manager.list()
        # pool_size = 100  # your "parallelness"
        # data to be sent to api
        num_requests = [1]
        for nr in num_requests:
            return_dict = manager.list()
            concurrent_vals(nr)
            max_val = test_max(return_dict)
            avg_val = test_avg(return_dict)
            # print(return_dict)
            print("NUM_REQUESTS", nr)
            print("MAX_VAL", max_val)
            print("AVG_VAL", avg_val)
Beispiel #52
0
def configure_parallel(**kwargs):

    basic_conf, modules_fn, summary_writer = basic_configure(**kwargs)

    jobdir = basic_conf['jobdir']
    env_spec = basic_conf['env_spec']
    T = env_spec['T']
    seed = basic_conf['seed']
    num_worker = basic_conf['num_worker']
    env_fn = modules_fn['env_fn']
    tasks_fn = modules_fn['tasks_fn']
    policies_fn = modules_fn['policies_fn']
    gnets_fn = modules_fn['gnets_fn']
    task_selector_fn = modules_fn['task_selector_fn']
    task_planner_fn = modules_fn['task_planner_fn']
    forward_model_fn = modules_fn['forward_model_fn']
    rollout_worker_fn = modules_fn['rollout_worker_fn']

    ##########################
    #    Continue training   #
    ##########################

    restart_after = get_parameter('restart_after', params=kwargs, default=None)
    continued_params = {}
    if restart_after is not None and os.path.exists(os.path.join(jobdir, 'restart')):
        with open(os.path.join(jobdir, 'parallel_params.json'), 'r') as f:
            continued_params = json.load(f)

    ##########################
    #  Load external config  #
    ##########################

    parallel_params_path = get_parameter('basic_params_path', params=continued_params, default=None)
    parallel_params_path = get_parameter('basic_params_path', params=kwargs, default=parallel_params_path)
    external_params = {}
    if parallel_params_path is not None:
        with open(parallel_params_path, 'r') as f:
            external_params = json.load(f)

    ###################################
    #  Prepare shared memory manager  #
    ###################################

    manager = Manager()
    episode = manager.dict()
    info = manager.dict()
    managed_memory = dict(
        episode=episode,
        info=info,
    )

    ##########################
    # Prepare rollout worker #
    ##########################

    parallel_rollout_manager_params = dict(
        num_worker=num_worker,
    )
    update_default_params(parallel_params_path, external_params.get('parallel_rollout_manager_params', {}))
    update_default_params(parallel_params_path, continued_params.get('parallel_rollout_manager_params', {}))
    update_default_params(parallel_rollout_manager_params, kwargs.get('parallel_rollout_manager_params', {}))

    parallel_rollout_manager = ParallelRolloutManager(env_spec, env_fn, tasks_fn, policies_fn, gnets_fn, task_selector_fn,
                                                      task_planner_fn, forward_model_fn, rollout_worker_fn, T,
                                                      managed_memory=managed_memory, **parallel_rollout_manager_params)

    ##########################
    #  Prepare train worker  #
    ##########################

    parallel_train_manager = ParallelTrainManager(managed_memory=managed_memory, summary_writer=summary_writer,
                                                  jobdir=jobdir, seed=seed)
    [parallel_train_manager.add_module(policy_fn) for policy_fn in policies_fn]
    [parallel_train_manager.add_module(task_fn) for task_fn in tasks_fn]
    parallel_train_manager.add_module(forward_model_fn)
    [parallel_train_manager.add_module(gnets_fn[i][j]) for i in range(len(tasks_fn)) for j in range(len(tasks_fn))]
    parallel_train_manager.add_module(task_selector_fn)
    parallel_train_manager.add_module(task_planner_fn)

    ##########################
    #  Load external params  #
    ##########################

    params_path = basic_conf['params_path']
    params_prefix = basic_conf['params_prefix']

    if params_path:
        params_path = params_path if params_path else jobdir
        try:
            parallel_train_manager.load_global_params(path=params_path, prefix=params_prefix)
            logger.info(f'Restored params from {params_path} with prefix {params_prefix}')
        except:
            logger.warning('Could not restore params')
            raise

    ##########################
    #    Continue training   #
    ##########################

    if basic_conf['restart_after'] is not None and os.path.exists(os.path.join(jobdir, 'restart')):
        try:
            parallel_train_manager.load_global_params(path=jobdir, prefix='latest')
            parallel_train_manager.restore_buffer(path=jobdir, prefix='latest')
            logger.info(f'Restored params from {params_path} with prefix {params_prefix}')
        except:
            logger.warning('Could not restore params')
            raise

    params = dict(
        parallel_rollout_manager_params=parallel_rollout_manager_params,
    )

    with open(os.path.join(jobdir, 'parallel_params.json'), 'w') as f:
        json.dump(params, f)

    return parallel_rollout_manager, parallel_train_manager, basic_conf, params, modules_fn, managed_memory, summary_writer
    def other(self, *args):
        self.request.sendall('error command: {}'.format(' '.join(args[0])))


def run_server(host, port, request_q_size=50):
    socketserver.ThreadingTCPServer.allow_reuse_address = True
    server = socketserver.ThreadingTCPServer((host, port), MYHandler)
    server.request_queue_size = request_q_size
    print('start server on {}:{}'.format(host, port))
    with server:
        server.serve_forever()


if __name__ == '__main__':
    manager = Manager()
    queue = manager.Queue(5)
    error_ip_list = manager.list()
    process_list = []
    pull_ip_num = 50
    order_id = ''
    api_url = 'http://dps.kdlapi.com/api/getdps/?orderid={}&num={}&area=%E6%B2%B3%E5%8D%97%2C%E5%90%89%E6%9E%97%2C%E5%B1%B1%E8%A5%BF%2C%E5%B1%B1%E4%B8%9C%2C%E6%B9%96%E5%8C%97%2C%E5%86%85%E8%92%99%E5%8F%A4%2C%E5%AE%89%E5%BE%BD%2C%E7%94%98%E8%82%83%2C%E5%AE%81%E5%A4%8F%2C%E5%9B%9B%E5%B7%9D%2C%E5%B9%BF%E8%A5%BF&pt=1&dedup=1&sep=2'.format(
        order_id, pull_ip_num)
    signature = ''
    check_ip_expired_url = 'https://dps.kdlapi.com/api/checkdpsvalid?orderid={}&signature={}&proxy='.format(order_id,
                                                                                                            signature)
    get_ip_balance = 'https://dps.kdlapi.com/api/getipbalance?orderid={}&signature={}'.format(order_id, signature)
    operate_ip = OperateIP(api_url, check_ip_expired_url, get_ip_balance)
    get_ip_process = Process(target=operate_ip.get_ip_list)
    process_list.append(get_ip_process)
    check_ip_process = Process(target=operate_ip.check_error_ip)
def Run_model(data, D, Choose_P, training_step, temp, d, detection_key,
              local_K):
    if detection_key == 1:
        D = 1
        print('Start point-to-point anomaly detection method... ')
        if len(Choose_P) == 0:
            print('Detection target: all variables!')
        else:
            print('Detection target: specific variables!')
    if detection_key == 2:
        print('Start interval anomaly detection method... ')
        if len(Choose_P) == 0:
            print('Detection target: all variables!')
        else:
            print('Detection target: specific variables!')
    if detection_key == 3:
        D = 1
        print('Start local anomaly detection method... ')
        if len(Choose_P) == 0:
            print('Detection target: all variables!')
        else:
            print('Detection target: specific variables!')
    data = Normalize_function(data)  # Data normalization

    data = del_zero_matrix(data)  # Delete useless variables
    N = len(data[0])  #Data length, which is the number of nodes
    K_temp = np.identity(N)  # Initialize a unit vector
    P = len(data)  #Number of variables

    if len(
            Choose_P
    ) == 0:  #If it is empty, it means that there is no choice, and all are selected by default
        Choose_P = np.arange(
            0, P)  #Select the variable information to be focused on

    print('The system detects ' + str(len(Choose_P)) +
          'target variables and starts ' + str(len(Choose_P)) +
          'processes ...')

    jobs = []  #Storage process
    common_data = Manager().list(
    )  # Here is a shared variable that declares a list
    for i in range(len(Choose_P)):  #Start the corresponding process
        p = Process(target=Aligned_kernel_matrix,
                    args=(i, data, N, D, P, Choose_P[i], temp, detection_key,
                          local_K,
                          common_data))  #Share the common_data variable
        jobs.append(p)
        p.start()  #Start process

    for proc in jobs:
        proc.join(
        )  #Use blocking to wait for all processes to end before proceeding

    K_temp = np.identity(N)  # Initialize a unit vector
    for i in range(len(Choose_P)):
        K_temp = np.matmul(K_temp, common_data[i])  #Multiply all matrices

    S = Normalize_matrix(K_temp)  # Normalized

    print('The calculation is over... ')

    c = np.array([1] * N)
    c = Train_model(S, c, d, N, training_step)

    return c
Beispiel #55
0
def fill_volume_with_model(model_file,
                           volume,
                           resume_prediction=None,
                           checkpoint_filename=None,
                           checkpoint_label_interval=20,
                           seed_generator='sobel',
                           background_label_id=0,
                           bias=True,
                           move_batch_size=1,
                           max_moves=None,
                           max_bodies=None,
                           num_workers=CONFIG.training.num_gpus,
                           worker_prequeue=1,
                           filter_seeds_by_mask=True,
                           reject_non_seed_components=True,
                           reject_early_termination=False,
                           remask_interval=None,
                           shuffle_seeds=True):
    subvolume = volume.get_subvolume(
        SubvolumeBounds(start=np.zeros(3, dtype=np.int64), stop=volume.shape))
    # Create an output label volume.
    if resume_prediction is None:
        prediction = np.full_like(subvolume.image,
                                  background_label_id,
                                  dtype=np.uint64)
        label_id = 0
    else:
        if resume_prediction.shape != subvolume.image.shape:
            raise ValueError('Resume volume prediction is wrong shape.')
        prediction = resume_prediction
        prediction.flags.writeable = True
        label_id = prediction.max()
    # Create a conflict count volume that tracks locations where segmented
    # bodies overlap. For now the first body takes precedence in the
    # predicted labels.
    conflict_count = np.full_like(prediction, 0, dtype=np.uint32)

    def worker(worker_id, set_devices, model_file, image, seeds, results, lock,
               revoked):
        lock.acquire()
        import tensorflow as tf

        if set_devices:
            # Only make one GPU visible to Tensorflow so that it does not allocate
            # all available memory on all devices.
            # See: https://stackoverflow.com/questions/37893755
            os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
            os.environ['CUDA_VISIBLE_DEVICES'] = str(worker_id)

        with tf.device('/gpu:0'):
            # Late import to avoid Keras import until TF bindings are set.
            from .network import load_model

            logging.debug('Worker %s: loading model', worker_id)
            model = load_model(model_file, CONFIG.network)
        lock.release()

        def is_revoked(test_seed):
            ret = False
            lock.acquire()
            if tuple(test_seed) in revoked:
                ret = True
                revoked.remove(tuple(test_seed))
            lock.release()
            return ret

        while True:
            seed = seeds.get(True)

            if not isinstance(seed, np.ndarray):
                logging.debug('Worker %s: got DONE', worker_id)
                break

            if is_revoked(seed):
                results.put((seed, None))
                continue

            def stopping_callback(region):
                stop = is_revoked(seed)
                if reject_non_seed_components and \
                   region.bias_against_merge and \
                   region.mask[tuple(region.seed_vox)] < 0.5:
                    stop = True
                return stop

            logging.debug('Worker %s: got seed %s', worker_id,
                          np.array_str(seed))

            # Flood-fill and get resulting mask.
            # Allow reading outside the image volume bounds to allow segmentation
            # to fill all the way to the boundary.
            region = Region(image,
                            seed_vox=seed,
                            sparse_mask=True,
                            block_padding='reflect')
            region.bias_against_merge = bias
            early_termination = False
            try:
                six.next(
                    region.fill(model,
                                move_batch_size=move_batch_size,
                                max_moves=max_moves,
                                progress=2 + worker_id,
                                stopping_callback=stopping_callback,
                                remask_interval=remask_interval))
            except Region.EarlyFillTermination:
                early_termination = True
            except StopIteration:
                pass
            if reject_early_termination and early_termination:
                body = None
            else:
                body = region.to_body()
            logging.debug('Worker %s: seed %s filled', worker_id,
                          np.array_str(seed))

            results.put((seed, body))

    # Generate seeds from volume.
    generator = preprocessing.SEED_GENERATORS[seed_generator]
    seeds = generator(subvolume.image, CONFIG.volume.resolution)

    if filter_seeds_by_mask and volume.mask_data is not None:
        seeds = [
            s for s in seeds
            if volume.mask_data[tuple(volume.world_coord_to_local(s))]
        ]

    pbar = tqdm(desc='Seed queue', total=len(seeds), miniters=1, smoothing=0.0)
    label_pbar = tqdm(desc='Labeled vox',
                      total=prediction.size,
                      miniters=1,
                      smoothing=0.0,
                      position=1)
    num_seeds = len(seeds)
    if shuffle_seeds:
        random.shuffle(seeds)
    seeds = iter(seeds)

    manager = Manager()
    # Queue of seeds to be picked up by workers.
    seed_queue = manager.Queue()
    # Queue of results from workers.
    results_queue = manager.Queue()
    # Dequeue of seeds that were put in seed_queue but have not yet been
    # combined by the main process.
    dispatched_seeds = deque()
    # Seeds that were placed in seed_queue but subsequently covered by other
    # results before their results have been processed. This allows workers to
    # abort working on these seeds by checking this list.
    revoked_seeds = manager.list()
    # Results that have been received by the main process but have not yet
    # been combined because they were not received in the dispatch order.
    unordered_results = {}

    def queue_next_seed():
        total = 0
        for seed in seeds:
            if prediction[seed[0], seed[1], seed[2]] != background_label_id:
                # This seed has already been filled.
                total += 1
                continue
            dispatched_seeds.append(seed)
            seed_queue.put(seed)

            break

        return total

    for _ in range(min(num_seeds, num_workers * worker_prequeue)):
        processed_seeds = queue_next_seed()
        pbar.update(processed_seeds)

    if 'CUDA_VISIBLE_DEVICES' in os.environ:
        set_devices = False
        num_workers = 1
        logging.warn(
            'Environment variable CUDA_VISIBLE_DEVICES is set, so only one worker can be used.\n'
            'See https://github.com/aschampion/diluvian/issues/11')
    else:
        set_devices = True

    workers = []
    loading_lock = manager.Lock()
    for worker_id in range(num_workers):
        w = Process(target=worker,
                    args=(worker_id, set_devices, model_file, subvolume.image,
                          seed_queue, results_queue, loading_lock,
                          revoked_seeds))
        w.start()
        workers.append(w)

    last_checkpoint_label = label_id

    # For each seed, create region, fill, threshold, and merge to output volume.
    while dispatched_seeds:
        processed_seeds = 1
        expected_seed = dispatched_seeds.popleft()
        logging.debug('Expecting seed %s', np.array_str(expected_seed))

        if tuple(expected_seed) in unordered_results:
            logging.debug('Expected seed %s is in old results',
                          np.array_str(expected_seed))
            seed = expected_seed
            body = unordered_results[tuple(seed)]
            del unordered_results[tuple(seed)]

        else:
            seed, body = results_queue.get(True)
            processed_seeds += queue_next_seed()

            while not np.array_equal(seed, expected_seed):
                logging.debug('Seed %s is early, stashing', np.array_str(seed))
                unordered_results[tuple(seed)] = body
                seed, body = results_queue.get(True)
                processed_seeds += queue_next_seed()

        logging.debug('Processing seed at %s', np.array_str(seed))
        pbar.set_description('Seed ' + np.array_str(seed))
        pbar.update(processed_seeds)

        if prediction[seed[0], seed[1], seed[2]] != background_label_id:
            # This seed has already been filled.
            logging.debug(
                'Seed (%s) was filled but has been covered in the meantime.',
                np.array_str(seed))
            loading_lock.acquire()
            if tuple(seed) in revoked_seeds:
                revoked_seeds.remove(tuple(seed))
            loading_lock.release()
            continue

        if body is None:
            logging.debug('Body was None.')
            continue

        if reject_non_seed_components and not body.is_seed_in_mask():
            logging.debug('Seed (%s) is not in its body.', np.array_str(seed))
            continue

        if reject_non_seed_components:
            mask, bounds = body.get_seeded_component(
                CONFIG.postprocessing.closing_shape)
        else:
            mask, bounds = body._get_bounded_mask()

        body_size = np.count_nonzero(mask)

        if body_size == 0:
            logging.debug('Body was empty.')
            continue

        # Generate a label ID for this region.
        label_id += 1
        if label_id == background_label_id:
            label_id += 1

        logging.debug('Adding body to prediction label volume.')
        bounds_shape = list(map(slice, bounds[0], bounds[1]))
        prediction_mask = prediction[bounds_shape] == background_label_id
        for seed in dispatched_seeds:
            if np.all(bounds[0] <= seed) and np.all(
                    bounds[1] > seed) and mask[tuple(seed - bounds[0])]:
                loading_lock.acquire()
                if tuple(seed) not in revoked_seeds:
                    revoked_seeds.append(tuple(seed))
                loading_lock.release()
        conflict_count[bounds_shape][np.logical_and(
            np.logical_not(prediction_mask), mask)] += 1
        label_shape = np.logical_and(prediction_mask, mask)
        prediction[bounds_shape][np.logical_and(prediction_mask,
                                                mask)] = label_id

        label_pbar.set_description('Label {}'.format(label_id))
        label_pbar.update(np.count_nonzero(label_shape))
        logging.info('Filled seed (%s) with %s voxels labeled %s.',
                     np.array_str(seed), body_size, label_id)

        if max_bodies and label_id >= max_bodies:
            # Drain the queues.
            while not seed_queue.empty():
                seed_queue.get_nowait()
            break

        if checkpoint_filename is not None and label_id - last_checkpoint_label > checkpoint_label_interval:
            config = HDF5Volume.write_file(checkpoint_filename + '.hdf5',
                                           CONFIG.volume.resolution,
                                           label_data=prediction)
            config['name'] = 'segmentation checkpoint'
            with open(checkpoint_filename + '.toml', 'wb') as tomlfile:
                tomlfile.write('# Filling model: {}\n'.format(model_file))
                tomlfile.write(str(toml.dumps({'dataset': [config]})))

    for _ in range(num_workers):
        seed_queue.put('DONE')
    for wid, worker in enumerate(workers):
        worker.join()
    manager.shutdown()

    label_pbar.close()
    pbar.close()

    return prediction, conflict_count
def store_thumbnails(image_file, thumbnail_dictionary):
    colors = process_images(image_file)
    # print(colors)
    if colors:
        # print(colors)
        for item in colors:

            if item[0] in thumbnail_dictionary:
                thumbnail_dictionary[item[0]] += [image_file]
            else:
                thumbnail_dictionary[item[0]] = [image_file]


if __name__ == "__main__":
    manager = Manager()
    image_thumbnail_dictionary = manager.dict()

    picture_folder_name = sys.argv[1]
    current_folder = os.getcwd()
    base_folder_name = os.path.basename(picture_folder_name)

    data_json_file = os.path.join(current_folder + os.sep + "data" + os.sep +
                                  str(base_folder_name + "_data.json"))

    f = find_pictures(picture_folder_name)
    # print(f)

    # print(image_thumbnail_dictionary)

    pool = Pool(cpu_count())
Beispiel #57
0
from multiprocessing import Manager, Pool
import os, time, random


def reader(q):
    print('reader启动{},父进程的pid为{}'.format(os.getpid(), os.getppid()))
    for i in range(q.qsize()):
        time.sleep(1)
        print('reader从Queue获取到消息{}'.format(q.get()))


def writer(q):
    print('writer启动{},父进程的pid为{}'.format(os.getpid(), os.getppid()))
    for i in 'abcdefg':
        q.put(i)


if __name__ == '__main__':
    p = Pool()
    q = Manager().Queue()  #共享队列
    p.apply_async(writer, args=(q, ))
    time.sleep(1)
    p.apply_async(reader, args=(q, ))
    p.close()
    p.join()
Beispiel #58
0
class SQLDB:

    PRAGMAS = """
        pragma journal_mode=WAL;
    """

    CREATE_CLAIM_TABLE = """
        create table if not exists claim (
            claim_hash bytes primary key,
            claim_id text not null,
            claim_name text not null,
            normalized text not null,
            txo_hash bytes not null,
            tx_position integer not null,
            amount integer not null,
            timestamp integer not null, -- last updated timestamp
            creation_timestamp integer not null,
            height integer not null, -- last updated height
            creation_height integer not null,
            activation_height integer,
            expiration_height integer not null,
            release_time integer not null,

            short_url text not null, -- normalized#shortest-unique-claim_id
            canonical_url text, -- channel's-short_url/normalized#shortest-unique-claim_id-within-channel

            title text,
            author text,
            description text,

            claim_type integer,
            reposted integer default 0,

            -- streams
            stream_type text,
            media_type text,
            fee_amount integer default 0,
            fee_currency text,
            duration integer,

            -- reposts
            reposted_claim_hash bytes,

            -- claims which are channels
            public_key_bytes bytes,
            public_key_hash bytes,
            claims_in_channel integer,

            -- claims which are inside channels
            channel_hash bytes,
            channel_join integer, -- height at which claim got valid signature / joined channel
            signature bytes,
            signature_digest bytes,
            signature_valid bool,

            effective_amount integer not null default 0,
            support_amount integer not null default 0,
            trending_group integer not null default 0,
            trending_mixed integer not null default 0,
            trending_local integer not null default 0,
            trending_global integer not null default 0
        );

        create index if not exists claim_normalized_idx on claim (normalized, activation_height);
        create index if not exists claim_channel_hash_idx on claim (channel_hash, signature, claim_hash);
        create index if not exists claim_claims_in_channel_idx on claim (signature_valid, channel_hash, normalized);
        create index if not exists claim_txo_hash_idx on claim (txo_hash);
        create index if not exists claim_activation_height_idx on claim (activation_height, claim_hash);
        create index if not exists claim_expiration_height_idx on claim (expiration_height);
        create index if not exists claim_reposted_claim_hash_idx on claim (reposted_claim_hash);
    """

    CREATE_SUPPORT_TABLE = """
        create table if not exists support (
            txo_hash bytes primary key,
            tx_position integer not null,
            height integer not null,
            claim_hash bytes not null,
            amount integer not null
        );
        create index if not exists support_claim_hash_idx on support (claim_hash, height);
    """

    CREATE_TAG_TABLE = """
        create table if not exists tag (
            tag text not null,
            claim_hash bytes not null,
            height integer not null
        );
        create unique index if not exists tag_claim_hash_tag_idx on tag (claim_hash, tag);
    """

    CREATE_CLAIMTRIE_TABLE = """
        create table if not exists claimtrie (
            normalized text primary key,
            claim_hash bytes not null,
            last_take_over_height integer not null
        );
        create index if not exists claimtrie_claim_hash_idx on claimtrie (claim_hash);
    """

    SEARCH_INDEXES = """
        -- used by any tag clouds
        create index if not exists tag_tag_idx on tag (tag, claim_hash);

        -- naked order bys (no filters)
        create unique index if not exists claim_release_idx on claim (release_time, claim_hash);
        create unique index if not exists claim_trending_idx on claim (trending_group, trending_mixed, claim_hash);
        create unique index if not exists claim_effective_amount_idx on claim (effective_amount, claim_hash);

        -- claim_type filter + order by
        create unique index if not exists claim_type_release_idx on claim (release_time, claim_type, claim_hash);
        create unique index if not exists claim_type_trending_idx on claim (trending_group, trending_mixed, claim_type, claim_hash);
        create unique index if not exists claim_type_effective_amount_idx on claim (effective_amount, claim_type, claim_hash);

        -- stream_type filter + order by
        create unique index if not exists stream_type_release_idx on claim (stream_type, release_time, claim_hash);
        create unique index if not exists stream_type_trending_idx on claim (stream_type, trending_group, trending_mixed, claim_hash);
        create unique index if not exists stream_type_effective_amount_idx on claim (stream_type, effective_amount, claim_hash);

        -- channel_hash filter + order by
        create unique index if not exists channel_hash_release_idx on claim (channel_hash, release_time, claim_hash);
        create unique index if not exists channel_hash_trending_idx on claim (channel_hash, trending_group, trending_mixed, claim_hash);
        create unique index if not exists channel_hash_effective_amount_idx on claim (channel_hash, effective_amount, claim_hash);

        -- duration filter + order by
        create unique index if not exists duration_release_idx on claim (duration, release_time, claim_hash);
        create unique index if not exists duration_trending_idx on claim (duration, trending_group, trending_mixed, claim_hash);
        create unique index if not exists duration_effective_amount_idx on claim (duration, effective_amount, claim_hash);

        -- fee_amount + order by
        create unique index if not exists fee_amount_release_idx on claim (fee_amount, release_time, claim_hash);
        create unique index if not exists fee_amount_trending_idx on claim (fee_amount, trending_group, trending_mixed, claim_hash);
        create unique index if not exists fee_amount_effective_amount_idx on claim (fee_amount, effective_amount, claim_hash);

        -- TODO: verify that all indexes below are used
        create index if not exists claim_height_normalized_idx on claim (height, normalized asc);
        create index if not exists claim_resolve_idx on claim (normalized, claim_id);
        create index if not exists claim_id_idx on claim (claim_id, claim_hash);
        create index if not exists claim_timestamp_idx on claim (timestamp);
        create index if not exists claim_public_key_hash_idx on claim (public_key_hash);
        create index if not exists claim_signature_valid_idx on claim (signature_valid);
    """

    TAG_INDEXES = '\n'.join(
        f"create unique index if not exists tag_{tag_key}_idx on tag (tag, claim_hash) WHERE tag='{tag_value}';"
        for tag_value, tag_key in COMMON_TAGS.items())

    CREATE_TABLES_QUERY = (CREATE_CLAIM_TABLE + CREATE_FULL_TEXT_SEARCH +
                           CREATE_SUPPORT_TABLE + CREATE_CLAIMTRIE_TABLE +
                           CREATE_TAG_TABLE)

    def __init__(self, main, path: str, blocking_channels: list,
                 filtering_channels: list, trending: list):
        self.main = main
        self._db_path = path
        self.db = None
        self.logger = class_logger(__name__, self.__class__.__name__)
        self.ledger = Ledger if main.coin.NET == 'mainnet' else RegTestLedger
        self._fts_synced = False
        self.state_manager = None
        self.blocked_streams = None
        self.blocked_channels = None
        self.blocking_channel_hashes = {
            unhexlify(channel_id)[::-1]
            for channel_id in blocking_channels if channel_id
        }
        self.filtered_streams = None
        self.filtered_channels = None
        self.filtering_channel_hashes = {
            unhexlify(channel_id)[::-1]
            for channel_id in filtering_channels if channel_id
        }
        self.trending = trending

    def open(self):
        self.db = apsw.Connection(
            self._db_path,
            flags=(apsw.SQLITE_OPEN_READWRITE | apsw.SQLITE_OPEN_CREATE
                   | apsw.SQLITE_OPEN_URI))

        def exec_factory(cursor, statement, bindings):
            tpl = namedtuple('row', (d[0] for d in cursor.getdescription()))
            cursor.setrowtrace(lambda cursor, row: tpl(*row))
            return True

        self.db.setexectrace(exec_factory)
        self.execute(self.PRAGMAS)
        self.execute(self.CREATE_TABLES_QUERY)
        register_canonical_functions(self.db)
        self.state_manager = Manager()
        self.blocked_streams = self.state_manager.dict()
        self.blocked_channels = self.state_manager.dict()
        self.filtered_streams = self.state_manager.dict()
        self.filtered_channels = self.state_manager.dict()
        self.update_blocked_and_filtered_claims()
        for algorithm in self.trending:
            algorithm.install(self.db)

    def close(self):
        if self.db is not None:
            self.db.close()
        if self.state_manager is not None:
            self.state_manager.shutdown()

    def update_blocked_and_filtered_claims(self):
        self.update_claims_from_channel_hashes(self.blocked_streams,
                                               self.blocked_channels,
                                               self.blocking_channel_hashes)
        self.update_claims_from_channel_hashes(self.filtered_streams,
                                               self.filtered_channels,
                                               self.filtering_channel_hashes)
        self.filtered_streams.update(self.blocked_streams)
        self.filtered_channels.update(self.blocked_channels)

    def update_claims_from_channel_hashes(self, shared_streams,
                                          shared_channels, channel_hashes):
        streams, channels = {}, {}
        if channel_hashes:
            sql = query(
                "SELECT repost.channel_hash, repost.reposted_claim_hash, target.claim_type "
                "FROM claim as repost JOIN claim AS target ON (target.claim_hash=repost.reposted_claim_hash)",
                **{
                    'repost.reposted_claim_hash__is_not_null': 1,
                    'repost.channel_hash__in': channel_hashes
                })
            for blocked_claim in self.execute(*sql):
                if blocked_claim.claim_type == CLAIM_TYPES['stream']:
                    streams[blocked_claim.
                            reposted_claim_hash] = blocked_claim.channel_hash
                elif blocked_claim.claim_type == CLAIM_TYPES['channel']:
                    channels[blocked_claim.
                             reposted_claim_hash] = blocked_claim.channel_hash
        shared_streams.clear()
        shared_streams.update(streams)
        shared_channels.clear()
        shared_channels.update(channels)

    @staticmethod
    def _insert_sql(table: str, data: dict) -> Tuple[str, list]:
        columns, values = [], []
        for column, value in data.items():
            columns.append(column)
            values.append(value)
        sql = (f"INSERT INTO {table} ({', '.join(columns)}) "
               f"VALUES ({', '.join(['?'] * len(values))})")
        return sql, values

    @staticmethod
    def _update_sql(table: str, data: dict, where: str,
                    constraints: Union[list, tuple]) -> Tuple[str, list]:
        columns, values = [], []
        for column, value in data.items():
            columns.append(f"{column} = ?")
            values.append(value)
        values.extend(constraints)
        return f"UPDATE {table} SET {', '.join(columns)} WHERE {where}", values

    @staticmethod
    def _delete_sql(table: str, constraints: dict) -> Tuple[str, dict]:
        where, values = constraints_to_sql(constraints)
        return f"DELETE FROM {table} WHERE {where}", values

    def execute(self, *args):
        return self.db.cursor().execute(*args)

    def executemany(self, *args):
        return self.db.cursor().executemany(*args)

    def begin(self):
        self.execute('begin;')

    def commit(self):
        self.execute('commit;')

    def _upsertable_claims(self,
                           txos: List[Output],
                           header,
                           clear_first=False):
        claim_hashes, claims, tags = set(), [], {}
        for txo in txos:
            tx = txo.tx_ref.tx

            try:
                assert txo.claim_name
                assert txo.normalized_name
            except:
                #self.logger.exception(f"Could not decode claim name for {tx.id}:{txo.position}.")
                continue

            claim_hash = txo.claim_hash
            claim_hashes.add(claim_hash)
            claim_record = {
                'claim_hash': claim_hash,
                'claim_id': txo.claim_id,
                'claim_name': txo.claim_name,
                'normalized': txo.normalized_name,
                'txo_hash': txo.ref.hash,
                'tx_position': tx.position,
                'amount': txo.amount,
                'timestamp': header['timestamp'],
                'height': tx.height,
                'title': None,
                'description': None,
                'author': None,
                'duration': None,
                'claim_type': None,
                'stream_type': None,
                'media_type': None,
                'release_time': None,
                'fee_currency': None,
                'fee_amount': 0,
                'reposted_claim_hash': None
            }
            claims.append(claim_record)

            try:
                claim = txo.claim
            except:
                #self.logger.exception(f"Could not parse claim protobuf for {tx.id}:{txo.position}.")
                continue

            if claim.is_stream:
                claim_record['claim_type'] = CLAIM_TYPES['stream']
                claim_record['media_type'] = claim.stream.source.media_type
                claim_record['stream_type'] = STREAM_TYPES[guess_stream_type(
                    claim_record['media_type'])]
                claim_record['title'] = claim.stream.title
                claim_record['description'] = claim.stream.description
                claim_record['author'] = claim.stream.author
                if claim.stream.video and claim.stream.video.duration:
                    claim_record['duration'] = claim.stream.video.duration
                if claim.stream.audio and claim.stream.audio.duration:
                    claim_record['duration'] = claim.stream.audio.duration
                if claim.stream.release_time:
                    claim_record['release_time'] = claim.stream.release_time
                if claim.stream.has_fee:
                    fee = claim.stream.fee
                    if isinstance(fee.currency, str):
                        claim_record['fee_currency'] = fee.currency.lower()
                    if isinstance(fee.amount, Decimal):
                        claim_record['fee_amount'] = int(fee.amount * 1000)
            elif claim.is_repost:
                claim_record['claim_type'] = CLAIM_TYPES['repost']
                claim_record[
                    'reposted_claim_hash'] = claim.repost.reference.claim_hash
            elif claim.is_channel:
                claim_record['claim_type'] = CLAIM_TYPES['channel']

            for tag in clean_tags(claim.message.tags):
                tags[(tag, claim_hash)] = (tag, claim_hash, tx.height)

        if clear_first:
            self._clear_claim_metadata(claim_hashes)

        if tags:
            self.executemany(
                "INSERT OR IGNORE INTO tag (tag, claim_hash, height) VALUES (?, ?, ?)",
                tags.values())

        return claims

    def insert_claims(self, txos: List[Output], header):
        claims = self._upsertable_claims(txos, header)
        if claims:
            self.executemany(
                """
                INSERT OR IGNORE INTO claim (
                    claim_hash, claim_id, claim_name, normalized, txo_hash, tx_position, amount,
                    claim_type, media_type, stream_type, timestamp, creation_timestamp,
                    fee_currency, fee_amount, title, description, author, duration, height, reposted_claim_hash,
                    creation_height, release_time, activation_height, expiration_height, short_url)
                VALUES (
                    :claim_hash, :claim_id, :claim_name, :normalized, :txo_hash, :tx_position, :amount,
                    :claim_type, :media_type, :stream_type, :timestamp, :timestamp,
                    :fee_currency, :fee_amount, :title, :description, :author, :duration, :height, :reposted_claim_hash, :height,
                    CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE :timestamp END,
                    CASE WHEN :normalized NOT IN (SELECT normalized FROM claimtrie) THEN :height END,
                    CASE WHEN :height >= 137181 THEN :height+2102400 ELSE :height+262974 END,
                    :claim_name||COALESCE(
                        (SELECT shortest_id(claim_id, :claim_id) FROM claim WHERE normalized = :normalized),
                        '#'||substr(:claim_id, 1, 1)
                    )
                )""", claims)

    def update_claims(self, txos: List[Output], header):
        claims = self._upsertable_claims(txos, header, clear_first=True)
        if claims:
            self.executemany(
                """
                UPDATE claim SET
                    txo_hash=:txo_hash, tx_position=:tx_position, amount=:amount, height=:height,
                    claim_type=:claim_type, media_type=:media_type, stream_type=:stream_type,
                    timestamp=:timestamp, fee_amount=:fee_amount, fee_currency=:fee_currency,
                    title=:title, duration=:duration, description=:description, author=:author, reposted_claim_hash=:reposted_claim_hash,
                    release_time=CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE release_time END
                WHERE claim_hash=:claim_hash;
                """, claims)

    def delete_claims(self, claim_hashes: Set[bytes]):
        """ Deletes claim supports and from claimtrie in case of an abandon. """
        if claim_hashes:
            affected_channels = self.execute(
                *query("SELECT channel_hash FROM claim",
                       channel_hash__is_not_null=1,
                       claim_hash__in=claim_hashes)).fetchall()
            for table in ('claim', 'support', 'claimtrie'):
                self.execute(
                    *self._delete_sql(table, {'claim_hash__in': claim_hashes}))
            self._clear_claim_metadata(claim_hashes)
            return {r.channel_hash for r in affected_channels}
        return set()

    def delete_claims_above_height(self, height: int):
        claim_hashes = [
            x[0] for x in self.execute(
                "SELECT claim_hash FROM claim WHERE height>?", (
                    height, )).fetchall()
        ]
        while claim_hashes:
            batch = set(claim_hashes[:500])
            claim_hashes = claim_hashes[500:]
            self.delete_claims(batch)

    def _clear_claim_metadata(self, claim_hashes: Set[bytes]):
        if claim_hashes:
            for table in ('tag', ):  # 'language', 'location', etc
                self.execute(
                    *self._delete_sql(table, {'claim_hash__in': claim_hashes}))

    def split_inputs_into_claims_supports_and_other(self, txis):
        txo_hashes = {txi.txo_ref.hash for txi in txis}
        claims = self.execute(
            *query("SELECT txo_hash, claim_hash, normalized FROM claim",
                   txo_hash__in=txo_hashes)).fetchall()
        txo_hashes -= {r.txo_hash for r in claims}
        supports = {}
        if txo_hashes:
            supports = self.execute(
                *query("SELECT txo_hash, claim_hash FROM support",
                       txo_hash__in=txo_hashes)).fetchall()
            txo_hashes -= {r.txo_hash for r in supports}
        return claims, supports, txo_hashes

    def insert_supports(self, txos: List[Output]):
        supports = []
        for txo in txos:
            tx = txo.tx_ref.tx
            supports.append((txo.ref.hash, tx.position, tx.height,
                             txo.claim_hash, txo.amount))
        if supports:
            self.executemany(
                "INSERT OR IGNORE INTO support ("
                "   txo_hash, tx_position, height, claim_hash, amount"
                ") "
                "VALUES (?, ?, ?, ?, ?)", supports)

    def delete_supports(self, txo_hashes: Set[bytes]):
        if txo_hashes:
            self.execute(
                *self._delete_sql('support', {'txo_hash__in': txo_hashes}))

    def calculate_reposts(self, txos: List[Output]):
        targets = set()
        for txo in txos:
            try:
                claim = txo.claim
            except:
                continue
            if claim.is_repost:
                targets.add((claim.repost.reference.claim_hash, ))
        if targets:
            self.executemany(
                """
                UPDATE claim SET reposted = (
                    SELECT count(*) FROM claim AS repost WHERE repost.reposted_claim_hash = claim.claim_hash
                )
                WHERE claim_hash = ?
                """, targets)

    def validate_channel_signatures(self, height, new_claims, updated_claims,
                                    spent_claims, affected_channels, timer):
        if not new_claims and not updated_claims and not spent_claims:
            return

        sub_timer = timer.add_timer('segregate channels and signables')
        sub_timer.start()
        channels, new_channel_keys, signables = {}, {}, {}
        for txo in chain(new_claims, updated_claims):
            try:
                claim = txo.claim
            except:
                continue
            if claim.is_channel:
                channels[txo.claim_hash] = txo
                new_channel_keys[
                    txo.claim_hash] = claim.channel.public_key_bytes
            else:
                signables[txo.claim_hash] = txo
        sub_timer.stop()

        sub_timer = timer.add_timer('make list of channels we need to lookup')
        sub_timer.start()
        missing_channel_keys = set()
        for txo in signables.values():
            claim = txo.claim
            if claim.is_signed and claim.signing_channel_hash not in new_channel_keys:
                missing_channel_keys.add(claim.signing_channel_hash)
        sub_timer.stop()

        sub_timer = timer.add_timer('lookup missing channels')
        sub_timer.start()
        all_channel_keys = {}
        if new_channel_keys or missing_channel_keys or affected_channels:
            all_channel_keys = dict(
                self.execute(
                    *query("SELECT claim_hash, public_key_bytes FROM claim",
                           claim_hash__in=set(new_channel_keys)
                           | missing_channel_keys | affected_channels)))
        sub_timer.stop()

        sub_timer = timer.add_timer('prepare for updating claims')
        sub_timer.start()
        changed_channel_keys = {}
        for claim_hash, new_key in new_channel_keys.items():
            if claim_hash not in all_channel_keys or all_channel_keys[
                    claim_hash] != new_key:
                all_channel_keys[claim_hash] = new_key
                changed_channel_keys[claim_hash] = new_key

        claim_updates = []

        for claim_hash, txo in signables.items():
            claim = txo.claim
            update = {
                'claim_hash': claim_hash,
                'channel_hash': None,
                'signature': None,
                'signature_digest': None,
                'signature_valid': None
            }
            if claim.is_signed:
                update.update({
                    'channel_hash':
                    claim.signing_channel_hash,
                    'signature':
                    txo.get_encoded_signature(),
                    'signature_digest':
                    txo.get_signature_digest(self.ledger),
                    'signature_valid':
                    0
                })
            claim_updates.append(update)
        sub_timer.stop()

        sub_timer = timer.add_timer(
            'find claims affected by a change in channel key')
        sub_timer.start()
        if changed_channel_keys:
            sql = f"""
            SELECT * FROM claim WHERE
                channel_hash IN ({','.join('?' for _ in changed_channel_keys)}) AND
                signature IS NOT NULL
            """
            for affected_claim in self.execute(sql,
                                               changed_channel_keys.keys()):
                if affected_claim.claim_hash not in signables:
                    claim_updates.append({
                        'claim_hash': affected_claim.claim_hash,
                        'channel_hash': affected_claim.channel_hash,
                        'signature': affected_claim.signature,
                        'signature_digest': affected_claim.signature_digest,
                        'signature_valid': 0
                    })
        sub_timer.stop()

        sub_timer = timer.add_timer('verify signatures')
        sub_timer.start()
        for update in claim_updates:
            channel_pub_key = all_channel_keys.get(update['channel_hash'])
            if channel_pub_key and update['signature']:
                update['signature_valid'] = Output.is_signature_valid(
                    bytes(update['signature']),
                    bytes(update['signature_digest']), channel_pub_key)
        sub_timer.stop()

        sub_timer = timer.add_timer('update claims')
        sub_timer.start()
        if claim_updates:
            self.executemany(
                f"""
                UPDATE claim SET 
                    channel_hash=:channel_hash, signature=:signature, signature_digest=:signature_digest,
                    signature_valid=:signature_valid,
                    channel_join=CASE
                        WHEN signature_valid=1 AND :signature_valid=1 AND channel_hash=:channel_hash THEN channel_join
                        WHEN :signature_valid=1 THEN {height}
                    END,
                    canonical_url=CASE
                        WHEN signature_valid=1 AND :signature_valid=1 AND channel_hash=:channel_hash THEN canonical_url
                        WHEN :signature_valid=1 THEN
                            (SELECT short_url FROM claim WHERE claim_hash=:channel_hash)||'/'||
                            claim_name||COALESCE(
                                (SELECT shortest_id(other_claim.claim_id, claim.claim_id) FROM claim AS other_claim
                                 WHERE other_claim.signature_valid = 1 AND
                                       other_claim.channel_hash = :channel_hash AND
                                       other_claim.normalized = claim.normalized),
                                '#'||substr(claim_id, 1, 1)
                            )
                    END
                WHERE claim_hash=:claim_hash;
                """, claim_updates)
        sub_timer.stop()

        sub_timer = timer.add_timer('update claims affected by spent channels')
        sub_timer.start()
        if spent_claims:
            self.execute(
                f"""
                UPDATE claim SET
                    signature_valid=CASE WHEN signature IS NOT NULL THEN 0 END,
                    channel_join=NULL, canonical_url=NULL
                WHERE channel_hash IN ({','.join('?' for _ in spent_claims)})
                """, spent_claims)
        sub_timer.stop()

        sub_timer = timer.add_timer('update channels')
        sub_timer.start()
        if channels:
            self.executemany(
                """
                UPDATE claim SET
                    public_key_bytes=:public_key_bytes,
                    public_key_hash=:public_key_hash
                WHERE claim_hash=:claim_hash""", [{
                    'claim_hash':
                    claim_hash,
                    'public_key_bytes':
                    txo.claim.channel.public_key_bytes,
                    'public_key_hash':
                    self.ledger.address_to_hash160(
                        self.ledger.public_key_to_address(
                            txo.claim.channel.public_key_bytes))
                } for claim_hash, txo in channels.items()])
        sub_timer.stop()

        sub_timer = timer.add_timer('update claims_in_channel counts')
        sub_timer.start()
        if all_channel_keys:
            self.executemany(
                f"""
                UPDATE claim SET
                    claims_in_channel=(
                        SELECT COUNT(*) FROM claim AS claim_in_channel
                        WHERE claim_in_channel.signature_valid=1 AND
                              claim_in_channel.channel_hash=claim.claim_hash
                    )
                WHERE claim_hash = ?
            """,
                [(channel_hash, ) for channel_hash in all_channel_keys.keys()])
        sub_timer.stop()

        sub_timer = timer.add_timer('update blocked claims list')
        sub_timer.start()
        if (self.blocking_channel_hashes.intersection(all_channel_keys) or
                self.filtering_channel_hashes.intersection(all_channel_keys)):
            self.update_blocked_and_filtered_claims()
        sub_timer.stop()

    def _update_support_amount(self, claim_hashes):
        if claim_hashes:
            self.execute(
                f"""
                UPDATE claim SET
                    support_amount = COALESCE(
                        (SELECT SUM(amount) FROM support WHERE support.claim_hash=claim.claim_hash), 0
                    )
                WHERE claim_hash IN ({','.join('?' for _ in claim_hashes)})
            """, claim_hashes)

    def _update_effective_amount(self, height, claim_hashes=None):
        self.execute(
            f"UPDATE claim SET effective_amount = amount + support_amount "
            f"WHERE activation_height = {height}")
        if claim_hashes:
            self.execute(
                f"UPDATE claim SET effective_amount = amount + support_amount "
                f"WHERE activation_height < {height} "
                f"  AND claim_hash IN ({','.join('?' for _ in claim_hashes)})",
                claim_hashes)

    def _calculate_activation_height(self, height):
        last_take_over_height = f"""COALESCE(
            (SELECT last_take_over_height FROM claimtrie
            WHERE claimtrie.normalized=claim.normalized),
            {height}
        )
        """
        self.execute(f"""
            UPDATE claim SET activation_height = 
                {height} + min(4032, cast(({height} - {last_take_over_height}) / 32 AS INT))
            WHERE activation_height IS NULL
        """)

    def _perform_overtake(self, height, changed_claim_hashes, deleted_names):
        deleted_names_sql = claim_hashes_sql = ""
        if changed_claim_hashes:
            claim_hashes_sql = f"OR claim_hash IN ({','.join('?' for _ in changed_claim_hashes)})"
        if deleted_names:
            deleted_names_sql = f"OR normalized IN ({','.join('?' for _ in deleted_names)})"
        overtakes = self.execute(
            f"""
            SELECT winner.normalized, winner.claim_hash,
                   claimtrie.claim_hash AS current_winner,
                   MAX(winner.effective_amount) AS max_winner_effective_amount
            FROM (
                SELECT normalized, claim_hash, effective_amount FROM claim
                WHERE normalized IN (
                    SELECT normalized FROM claim WHERE activation_height={height} {claim_hashes_sql}
                ) {deleted_names_sql}
                ORDER BY effective_amount DESC, height ASC, tx_position ASC
            ) AS winner LEFT JOIN claimtrie USING (normalized)
            GROUP BY winner.normalized
            HAVING current_winner IS NULL OR current_winner <> winner.claim_hash
        """,
            list(changed_claim_hashes) + deleted_names)
        for overtake in overtakes:
            if overtake.current_winner:
                self.execute(
                    f"UPDATE claimtrie SET claim_hash = ?, last_take_over_height = {height} "
                    f"WHERE normalized = ?",
                    (overtake.claim_hash, overtake.normalized))
            else:
                self.execute(
                    f"INSERT INTO claimtrie (claim_hash, normalized, last_take_over_height) "
                    f"VALUES (?, ?, {height})",
                    (overtake.claim_hash, overtake.normalized))
            self.execute(
                f"UPDATE claim SET activation_height = {height} WHERE normalized = ? "
                f"AND (activation_height IS NULL OR activation_height > {height})",
                (overtake.normalized, ))

    def _copy(self, height):
        if height > 50:
            self.execute(f"DROP TABLE claimtrie{height-50}")
        self.execute(
            f"CREATE TABLE claimtrie{height} AS SELECT * FROM claimtrie")

    def update_claimtrie(self, height, changed_claim_hashes, deleted_names,
                         timer):
        r = timer.run

        r(self._calculate_activation_height, height)
        r(self._update_support_amount, changed_claim_hashes)

        r(self._update_effective_amount, height, changed_claim_hashes)
        r(self._perform_overtake, height, changed_claim_hashes,
          list(deleted_names))

        r(self._update_effective_amount, height)
        r(self._perform_overtake, height, [], [])

    def get_expiring(self, height):
        return self.execute(
            f"SELECT claim_hash, normalized FROM claim WHERE expiration_height = {height}"
        )

    def advance_txs(self, height, all_txs, header, daemon_height, timer):
        insert_claims = []
        update_claims = []
        update_claim_hashes = set()
        delete_claim_hashes = set()
        insert_supports = []
        delete_support_txo_hashes = set()
        recalculate_claim_hashes = set(
        )  # added/deleted supports, added/updated claim
        deleted_claim_names = set()
        delete_others = set()
        body_timer = timer.add_timer('body')
        for position, (etx, txid) in enumerate(all_txs):
            tx = timer.run(Transaction,
                           etx.raw,
                           height=height,
                           position=position)
            # Inputs
            spent_claims, spent_supports, spent_others = timer.run(
                self.split_inputs_into_claims_supports_and_other, tx.inputs)
            body_timer.start()
            delete_claim_hashes.update({r.claim_hash for r in spent_claims})
            deleted_claim_names.update({r.normalized for r in spent_claims})
            delete_support_txo_hashes.update(
                {r.txo_hash
                 for r in spent_supports})
            recalculate_claim_hashes.update(
                {r.claim_hash
                 for r in spent_supports})
            delete_others.update(spent_others)
            # Outputs
            for output in tx.outputs:
                if output.is_support:
                    insert_supports.append(output)
                    recalculate_claim_hashes.add(output.claim_hash)
                elif output.script.is_claim_name:
                    insert_claims.append(output)
                    recalculate_claim_hashes.add(output.claim_hash)
                elif output.script.is_update_claim:
                    claim_hash = output.claim_hash
                    update_claims.append(output)
                    recalculate_claim_hashes.add(claim_hash)
            body_timer.stop()

        skip_update_claim_timer = timer.add_timer(
            'skip update of abandoned claims')
        skip_update_claim_timer.start()
        for updated_claim in list(update_claims):
            if updated_claim.ref.hash in delete_others:
                update_claims.remove(updated_claim)
        for updated_claim in update_claims:
            claim_hash = updated_claim.claim_hash
            delete_claim_hashes.discard(claim_hash)
            update_claim_hashes.add(claim_hash)
        skip_update_claim_timer.stop()

        skip_insert_claim_timer = timer.add_timer(
            'skip insertion of abandoned claims')
        skip_insert_claim_timer.start()
        for new_claim in list(insert_claims):
            if new_claim.ref.hash in delete_others:
                if new_claim.claim_hash not in update_claim_hashes:
                    insert_claims.remove(new_claim)
        skip_insert_claim_timer.stop()

        skip_insert_support_timer = timer.add_timer(
            'skip insertion of abandoned supports')
        skip_insert_support_timer.start()
        for new_support in list(insert_supports):
            if new_support.ref.hash in delete_others:
                insert_supports.remove(new_support)
        skip_insert_support_timer.stop()

        expire_timer = timer.add_timer('recording expired claims')
        expire_timer.start()
        for expired in self.get_expiring(height):
            delete_claim_hashes.add(expired.claim_hash)
            deleted_claim_names.add(expired.normalized)
        expire_timer.stop()

        r = timer.run
        r(update_full_text_search, 'before-delete', delete_claim_hashes,
          self.db.cursor(), self.main.first_sync)
        affected_channels = r(self.delete_claims, delete_claim_hashes)
        r(self.delete_supports, delete_support_txo_hashes)
        r(self.insert_claims, insert_claims, header)
        r(self.calculate_reposts, insert_claims)
        r(update_full_text_search,
          'after-insert', [txo.claim_hash for txo in insert_claims],
          self.db.cursor(), self.main.first_sync)
        r(update_full_text_search,
          'before-update', [txo.claim_hash for txo in update_claims],
          self.db.cursor(), self.main.first_sync)
        r(self.update_claims, update_claims, header)
        r(update_full_text_search,
          'after-update', [txo.claim_hash for txo in update_claims],
          self.db.cursor(), self.main.first_sync)
        r(self.validate_channel_signatures,
          height,
          insert_claims,
          update_claims,
          delete_claim_hashes,
          affected_channels,
          forward_timer=True)
        r(self.insert_supports, insert_supports)
        r(self.update_claimtrie,
          height,
          recalculate_claim_hashes,
          deleted_claim_names,
          forward_timer=True)
        for algorithm in self.trending:
            r(algorithm.run, self.db.cursor(), height, daemon_height,
              recalculate_claim_hashes)
        if not self._fts_synced and self.main.first_sync and height == daemon_height:
            r(first_sync_finished, self.db.cursor())
            self._fts_synced = True
Beispiel #59
0
class DataManager:
    """
    Class to interact with the Data visualizer
    @author Frederic Abraham
    """
    def __init__(self,
                 display_data: dict,
                 parallel: bool = False,
                 visualize: bool = True):

        self.display_data = display_data

        data_names = [
            display_name['display_name'] for display_name in list(
                filter(lambda ele: ele['graph'], display_data.values()))
        ]

        self.parallel = parallel
        self.visualize = visualize
        if self.visualize:
            if self.parallel:
                self.manager = Manager()
                self.done = self.manager.Value("done", True)
                self.time_step = self.manager.Value("timestep", 0)
                self.line_dict = self.manager.dict(
                    {data_name: 0
                     for data_name in data_names})

                self.p = Process(target=run,
                                 args=(
                                     self.done,
                                     self.time_step,
                                     self.line_dict,
                                 ))
                self.p.start()
            else:
                plt.tight_layout()
                plt.ion()
                plt.show()

        self.time_steps = []
        self.data = {data_name: [] for data_name in display_data.keys()}
        self.colors = plt.get_cmap('plasma')(np.linspace(
            0, 0.8, len(self.data)))

    def update_time_step(self, new_time_step):
        self.time_steps.append(new_time_step)
        self.display_data['generation']['value'] = new_time_step
        if self.visualize:
            if self.parallel:
                self.time_step.value = new_time_step

    def update_value(self, key, value):
        self.data[key].append(value)
        self.display_data[key]['value'] = value
        if self.visualize:
            if self.parallel and self.display_data[key]['graph']:
                self.line_dict[self.display_data[key]['display_name']] = value

    def get_data(self, key):
        return self.data[key]

    def update(self):
        if not self.parallel and self.visualize:
            animate(
                self.time_steps,
                dict(
                    filter(lambda ele: self.display_data[ele[0]]['graph'],
                           self.data.items())), self.colors)

    def stop(self):
        if self.visualize and self.parallel:
            self.done.value = False
            self.p.join()
            self.p.close()
Beispiel #60
0
def main(argv):

    login = json.load(open(os.path.join(clientDir, 'config', 'login.json'), ))
    # initialize thread channels
    thcmds = {}
    thcmds['tgApi'] = Queue()
    thcmds['bot'] = Queue()
    ths = {}
    ths['tgApi'] = tgApi(
        cmdQueue=thcmds,
        token=login['telegram']['token'],
        botname=login['telegram']['botname'],
        authgroup=login['telegram']['authgroup'],
        itag="tgApi",
        otag="bot",
    )
    print(
        'telegram:',
        login['telegram']['token'],
        login['telegram']['botname'],
        login['telegram']['authgroup'],
    )
    # prepare threadings
    # initialize threadings
    for key, th in ths.items():
        th.daemon = True
        th.start()
    # initialize process channels
    manager = Manager()
    smpCmds = {}
    rmpCmds = {}
    impReqs = {}
    mps = {}
    is_on = {}
    is_auto = {}

    thcmds['tgApi'].put('client starts')
    pkey = 'bot'

    # socket
    HOST = login['server']['host']
    PORT = login['server']['port']
    print(
        'MT Communication @',
        HOST,
        PORT,
    )

    sel = selectors.DefaultSelector()
    lsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    lsock.bind((HOST, PORT))
    lsock.listen()
    print('listening on', (HOST, PORT))
    lsock.setblocking(False)
    sel.register(lsock, selectors.EVENT_READ, data=None)
    while True:
        events = sel.select(timeout=None)
        for key, mask in events:
            if key.data is None:
                # accept_wrapper(key.fileobj)
                sock = key.fileobj
                conn, addr = sock.accept()  # Should be ready to read
                print('accepted connection from', addr)
                conn.setblocking(False)
                data = types.SimpleNamespace(addr=addr, inb=b'', outb=b'')
                events = selectors.EVENT_READ | selectors.EVENT_WRITE
                sel.register(conn, events, data=data)
            else:
                # service_connection(key, mask)
                sock = key.fileobj
                data = key.data
                if mask & selectors.EVENT_READ:
                    recv_data = sock.recv(1024)  # Should be ready to read
                    if recv_data:
                        data.outb += recv_data
                    else:
                        print('closing connection to', data.addr)
                        sel.unregister(sock)
                        sock.close()
                if mask & selectors.EVENT_WRITE:
                    if data.outb:
                        try:
                            thcmds['tgApi'].put(data.outb.decode())
                        except Exception as e:
                            print(e)
                        print('echoing', repr(data.outb), 'to', data.addr)
                        sent = sock.send(data.outb)  # Should be ready to write
                        data.outb = data.outb[sent:]