def clean(self):
     threadPool = ThreadPool(self.threadCount)
     args = []
     for t in range(self.topicNum):
         args.append((self.connectorName, self.topics[t], self.partitionNum))
     threadPool.starmap(self.driver.cleanTableStagePipe, args)
     threadPool.close()
     threadPool.join()
Example #2
0
 def upload_for_hosts(self, hosts, files, remote_path):
     files_for_hosts = []
     for host in hosts:
         files_for_hosts.append([host, files, remote_path])
     pool = ThreadPool(self.threads_num)
     pool.starmap(self.upload_on_host, files_for_hosts)
     pool.close()
     pool.join()
Example #3
0
 def main(self, threads):
     from multiprocessing.dummy import Pool
     self.load(base_path)
     self.load_proxies(proxies_path, p_type)
     self.threads = threads
     pool = Pool(self.threads)
     pool.starmap(self.login,
                  zip(self.acc_array, itertools.cycle(self.proxies)))
Example #4
0
    def start(self):
        pool = ThreadPool(self.PROFILES_LOADER_POOL_SIZE)

        pool.starmap(self._do_search,
                     self._get_search_iterator())

        pool.close()
        pool.join()
Example #5
0
def download_media(media_set, session, directory, board_name):
    def download(thread, session, directory):
        directory = thread["download_path"] + "/"
        valid = False
        for post in thread["posts"]:
            name_key = "originalName"
            for media in post["files"]:
                filename = re.sub(r'[\\/*?:"<>|]', '', media[name_key])
                ext = media["ext"]
                alt_name = media["alt_filename"]
                link = "https://bbw-chan.nl" + media["path"]
                download_path = directory + filename
                count_string = len(download_path)
                lp = are_long_paths_enabled()
                if not lp:
                    if count_string > maximum_length:
                        num_sum = count_string - maximum_length
                        name_key = "alt_filename"
                        download_path = directory + post[name_key] + "." + ext

                og_filename = os.path.splitext(filename)[0]
                # Check for dupe here
                r = session.get(link, stream=True)
                if r.status_code != 404:
                    if not os.path.exists(os.path.dirname(download_path)):
                        os.makedirs(os.path.dirname(download_path))
                    with open(download_path, 'wb') as f:
                        for chunk in r.iter_content(chunk_size=1024):
                            if chunk:  # filter out keep-alive new chunks
                                f.write(chunk)
                    logger.info("Link: {}".format(link))
                    logger.info("Path: {}".format(download_path))
                    valid = True
                else:
                    logger.info("Fail (Link): {}".format(link))
                    logger.info("Fail (Path): {}".format(download_path))
        if valid:
            os.makedirs(directory, exist_ok=True)
            with open(directory + 'archive.json', 'w') as outfile:
                json.dump(thread, outfile)
            return thread
        else:
            return

    print("Download Processing")
    print("Name: " + board_name)
    print("Directory: " + directory)
    # print("Downloading "+post_count+" "+location)
    max_threads = len(media_set)
    if multithreading:
        pool = ThreadPool(max_threads)
    else:
        pool = ThreadPool(1)
    session.mount(
        'https://',
        requests.adapters.HTTPAdapter(pool_connections=4,
                                      pool_maxsize=max_threads))
    pool.starmap(download, product(media_set, [session], [directory]))
Example #6
0
def download_media(media_set, session, directory, username, post_count, location):
    def download(media, session, directory, username):
        count = 0
        while count < 11:
            link = media["link"]
            r = json_request(session, link, "HEAD", True, False)
            if not r:
                return False

            header = r.headers
            content_length = int(header["content-length"])
            date_object = datetime.strptime(
                media["postedAt"], "%d-%m-%Y %H:%M:%S")
            og_filename = media["filename"]
            media["ext"] = os.path.splitext(og_filename)[1]
            media["ext"] = media["ext"].replace(".", "")
            download_path = media["directory"]+media["filename"]
            timestamp = date_object.timestamp()
            if not overwrite_files:
                if check_for_dupe_file(download_path, content_length):
                    return
            r = json_request(session, link, "GET", True, False)
            if not r:
                return False
            delete = False
            try:
                with open(download_path, 'wb') as f:
                    delete = True
                    for chunk in r.iter_content(chunk_size=1024):
                        if chunk:  # filter out keep-alive new chunks
                            f.write(chunk)
            except (ConnectionResetError) as e:
                if delete:
                    os.unlink(download_path)
                log_error.exception(e)
                count += 1
                continue
            except Exception as e:
                if delete:
                    os.unlink(download_path)
                log_error.exception(str(e) + "\n Tries: "+str(count))
                count += 1
                # input("Enter to continue")
                continue
            format_image(download_path, timestamp)
            log_download.info("Link: {}".format(link))
            log_download.info("Path: {}".format(download_path))
            return True
    print("Download Processing")
    print("Name: "+username+" | Directory: " + directory)
    print("Downloading "+str(len(media_set))+" "+location+"\n")
    if multithreading:
        pool = ThreadPool()
    else:
        pool = ThreadPool(1)
    pool.starmap(download, product(
        media_set, [session], [directory], [username]))
Example #7
0
def generate_statistics(files, output_path_general, output_path_letters_punc):
    """
   входная точка в утилиту, которые формирует статискику для всех файлов

   Parameters:

   files: список файлов, подаваемых на вход утилите для дальнейшей обработки
   output_path_general: имя файла, где будет располагаться результат работы утилиты по общим характеристикам
   output_path_letters_punc: имя файла, где будет распологаться результат работы утилиты по отдельным буквам и знакам
   """

    global count
    count = len(files)

    open(output_path_general, 'w').close()
    open(output_path_letters_punc, 'w').close()

    # Итерируем список файлов и результат статистики пишем в качестве строки в файл
    pool = ThreadPool(5)
    results = pool.starmap(generate_statistic, zip(files))
    results_az_punc = pool.starmap(generate_statistic_az_punc, zip(files))
    pool.close()
    pool.join()

    file_result = []
    file_result_az_punc = []

    for result in results_az_punc:
        try:
            file_result_az_punc.append(result)
        except Exception as e:
            print(e)
            print(traceback.format_exc())

    for result in results:
        try:
            result_list = []
            for f in ordered_features:
                result_list.append(str(result[f]))
            file_result.append(result_list)
        except Exception as e:
            print(e)
            print(traceback.format_exc())

    # задаём формат csv-файла для статистики по общим характеристикам
    table_general = pd.DataFrame(file_result, index=files,
                                 columns=["number_of_alphabets",
                                          "number_of_characters",
                                          "number_of_words",
                                          "number_of_sentence",
                                          "average_sentence_length_by_character",
                                          "average_sentence_length_by_word",
                                          "average_word_length"
                                          ])
    table_general.to_csv(output_path_general, header=True, index=True)
    table_number_of_alphabets_az = pd.DataFrame(file_result_az_punc, index=files, columns=ordered_features_az_punc)
    table_number_of_alphabets_az.to_csv(output_path_letters_punc, header=True, index=True)
Example #8
0
    def loop(self):
        while 1:
            array = []
            for id in self.monitors:
                array.append((id, self.imageFolder))

            pool = ThreadPool(len(self.monitors))
            pool.starmap(self.bgTransition, array)
            time.sleep(self.timeout)
Example #9
0
 def process_testing_data(self):
     if self.testing_dir:
         pool = ThreadPool(self.NUM_THREADS)
         self.finished_loading = 0
         args = get_pool_args(self.testing_data, self.testing_labels,
                              self.testing_dir, self.num_classes)
         pool.starmap(self.process_symbol_directory, args)
         self.testing_labels = categorize_labels(
             self.testing_labels, self.num_classes)
         print("Finished loading testing data")
def main():
    input_file = './input.txt'
    output_file = './output.txt'

    pool = Pool(1)
    lock = Lock()
    pool.starmap(process_chunk, [ ( input_file, output_file, chunk_start, chunk_end, lock ) for chunk_start, chunk_end in chunkify(input_file, 10) ])

    pool.close()
    pool.join()
Example #11
0
def main(profile=None, dryrun=True):
  # AWS Credentials
  # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html
  session = boto3.Session(profile_name=profile)
  ec2 = session.client('ec2', region_name='us-east-1')
  regions = get_regions(ec2)
  if dryrun: print("Dryrun, not actually deleting anything")
  pool = ThreadPool(len(regions))
  pool.starmap(delete_everything_in_region, zip(itertools.repeat(ec2), itertools.repeat(session), regions, itertools.repeat(dryrun)))
  pool.close()
  pool.join()  # wait for parallel requests to complete
Example #12
0
def _multiThreadedTest(infiles):
    arg1 = []
    arg2 = home
    for item in infiles:
        arg1.append(item)
    pool = ThreadPool(len(arg1))
    pool.starmap(_csvParse, zip(arg1, repeat(arg2)))
    print("Parsed through %d IP addresses." %
          (len(set(internal_ips + external_ips))))
    _blackList(hosts=set(internal_ips + external_ips))
    _geolocate(hosts)
Example #13
0
def store_raw_images(paths, links):
    global pic_num
    for link, path in zip(links, paths):
        if not os.path.exists(path):
            os.makedirs(path)
        image_urls = str(urllib.request.urlopen(link).read())
        
        pool = ThreadPool(32)
        pool.starmap(loadImage, zip(itertools.repeat(path),image_urls.split('\\n'),itertools.count(pic_num))) 
        pool.close() 
        pool.join()
Example #14
0
    def __init__(self, json_path, dataset_root, classes, limit):
        # load the json file path
        # initialize the default dict to hold the annotations
        self.dataset = json.load(open(json_path, 'r'))
        print('Parsing Annotation File')
        self.img_to_annotations = defaultdict(list)
        self.img_id_to_img = {}
        self.categories = {}
        self.dataset_annotations = os.path.join(dataset_root, 'annotations')
        self.dataset_images = os.path.join(dataset_root, 'images')
        # obtain the category ids from the dataset
        category_ids = [cat['id'] for cat in self.dataset['categories'] if cat['name'] in classes]

        # create individual counter for annotations of each classes
        self.classes_counter = {}
        for i in range(len(category_ids)):
            self.classes_counter[category_ids[i]] = 0

        for category in self.dataset['categories']:
            self.categories[category['id']] = category

        # loop through the annotations in dataset
        count = 0
        for annotation in self.dataset['annotations']:
            # filter classes through the annotations
            if annotation['category_id'] in category_ids:
                self.img_to_annotations[annotation['image_id']].append(annotation)
                self.classes_counter[annotation['category_id']] += 1
                
                if self.classes_counter[annotation['category_id']] >= limit:
                    category_ids.remove(annotation['category_id'])

        for image in self.dataset['images']:
            self.img_id_to_img[image['id']] = image

        #for k, v in self.img_to_annotations.items():
        #    if not os.path.isfile(os.path.join(self.dataset_images, str(k) + '.jpg')):
         #       print('Image File doesnot exists {}'.format(k))
          #  if not os.path.isfile(os.path.join(self.dataset_annotations, str(k) + '.xml')):
           #     print('Annotations File doesnot exists {}'.format(k))


        print('Finished Parsing Images, \n\tTotal Images -> {}'.format(len(self.img_to_annotations)))
        print('\n\tTotal Annotations:')
        for k, v in self.classes_counter.items():
            print('\n\t\t{} -> {}'.format(self.categories[k]['name'], v))
        input('Press any key to continue')
        print('Generating Pascal VOC XML Format Annotations')
        # initialize multiple threads for processing the parsing
        pool = ThreadPool(parallel_threads)
        pool.starmap(self.downloader, self.img_to_annotations.items())
        pool.close()
        pool.join()
Example #15
0
def filterMsgS(args,msgList):
    """
    filter out all dedicate msg entries into different dedicate files

    :param msgList: msg block defitions load from json file
    :return:
    """
    pool = ThreadPool(cpu_count())
    #  pool.starmap(filterItem, zip(itertools.repeat(args), msgList))
    pool.starmap(startPoint, zip(itertools.repeat(args), msgList))
    pool.close()
    pool.join()
    def send(self):
        threadPool = ThreadPool(self.threadCount)
        for t in range(self.topicNum):
            self.driver.createTopics(self.topics[t], self.partitionNum, 1)
        sleep(5)

        args = []
        for t in range(self.topicNum):
            for p in range(self.partitionNum):
                args.append((t, p))
        threadPool.starmap(self.sendHelper, args)
        threadPool.close()
        threadPool.join()
def home_downloader():
    lnglats = get_lnglat()
    if dir_name not in (os.listdir(os.curdir)):
        os.mkdir(dir_name)
    lnglats_arg = []
    for lnglat in lnglats:
        lat = "{0:0<8}".format((lnglat[0]).replace('.', ''))[:8]
        lng = "{0:0<9}".format((lnglat[1]).replace('.', ''))[:9]
        lnglats_arg.append((lat, lng))
    pool = ThreadPool(1)
    pool.starmap(download_home, lnglats_arg)
    pool.close()
    pool.join()
def scan_urls(num_workers=8):
    p = "../data/event/"
    event_metadata = load_json(p + "event_metadata.json")
    pool = Pool(num_workers)
    for date_str in event_metadata:
        event_json = load_json(p + date_str + ".json")
        url_list = []
        for cam_id in event_json:
            for view_id in event_json[cam_id]["url"]:
                url_list += event_json[cam_id]["url"][view_id]["url"]
        pool.starmap(url_open_worker, url_list)
    pool.close()
    pool.join()
Example #19
0
    def cut(self, X, Y, index, depth):
        features = self.data.features
        best_edges = zeros(len(features))
        best_cuts = zeros(len(features))
        stump_cuts = pd.DataFrame(ones(X.shape, dtype=int), columns=features, index=X.index)
        stump_pool = ThreadPool(8)
        stump_input = []
        for i, feature in enumerate(features):
            stump_input.append((X[feature], Y))
        #stump_results = stump_pool.starmap(self.stump, stump_input)
        #stump_pool.close()
        #stump_pool.join()
        #for i, res in enumerate(stump_results):
        #    best_cuts[i], stump_cuts[features[i]], best_edges[i] = res

        for i, feature in enumerate(features):
            if i == 3 and depth == 2: print_stump = True
            else: print_stump = False
            best_cuts[i], stump_cuts[features[i]], best_edges[i] = self.stump(X[feature], Y, print=print_stump)

        idx = argmax(best_edges)
        # if index > 3 and index < 6:
        #     print(idx, features[idx], best_edges)
        counts = freq_count(stump_cuts[features[idx]])
        # just to get counts
        if self.method == 'gini':
            p = partitionGini(Y)
        else:
            p = partitionGini(Y)
        if -1 in counts:
            rule = Node('rule', index, p.N, depth=depth + 1, label=p.label, probability=p.probability, feature=features[idx], threshold=best_cuts[idx])
            self.tree[index] = rule
            print(rule)
            # print(X[stump_cuts[features[idx]]==-1][features[idx]], Y[stump_cuts[features[idx]]==-1])

            pool = ThreadPool(2)
            args = [(X[stump_cuts[features[idx]] == -1], Y[stump_cuts[features[idx]] == -1], rule.left, depth + 1),
                    (X[stump_cuts[features[idx]] == 1], Y[stump_cuts[features[idx]] == 1], rule.right, depth + 1)]
            pool.starmap(self.cut, args)
            pool.close()
            pool.join()
            # self.cut(X[stump_cuts[features[idx]]==-1], Y[stump_cuts[features[idx]]==-1], rule.left)
            # print(X[stump_cuts[features[idx]] == 1][features[idx]], Y[stump_cuts[features[idx]] == 1])
            # self.cut(X[stump_cuts[features[idx]] == 1], Y[stump_cuts[features[idx]] == 1], rule.right)
        else:
            # no cut took place
            leaf = Node('leaf', index, p.N, depth=depth + 1, label=p.label, probability=p.probability)
            self.leaf_samples += p.N.sum()/self.data.N
            print(leaf, self.leaf_samples)
            self.tree[index] = leaf
        return
Example #20
0
def main():
    global prid

    parser = argparse.ArgumentParser(
        "betterclone.py", description="copies a folder using service accounts")
    parser.add_argument("-k",
                        "--keyfile",
                        default="key.json",
                        help="keyfile filename")
    parser.add_argument("project", help="id of the project")
    parser.add_argument("source", help="id of the source folder")
    parser.add_argument("destination", help="id of the destination folder")
    args = parser.parse_args()

    print("auth main sa")
    credentials = ServiceAccountCredentials.from_json_keyfile_name(
        args.keyfile, [
            "https://www.googleapis.com/auth/iam",
            "https://www.googleapis.com/auth/drive"
        ])
    iam = googleapiclient.discovery.build("iam", "v1", credentials=credentials)
    drive = googleapiclient.discovery.build("drive",
                                            "v3",
                                            credentials=credentials)
    prid = args.project
    flist = resolve_folder(drive, args.source, args.destination)

    print("processing directories")
    while True:

        c = True

        for i in flist:
            if not i[2]:
                print("process " + i[0])
                flist += resolve_folder(drive, i[0], i[1])
                del flist[flist.index(i)]
                c = False

        if c:
            break

    print("start copy")
    pool = ThreadPool(98)
    pool.starmap(
        copy_dir,
        zip(itertools.repeat(credentials), [i[0] for i in flist],
            [i[1] for i in flist]))
    pool.close()
    pool.join()
Example #21
0
    def load_sources(self,
                     set_dir,
                     dataset="validation",
                     normalize="zscore",
                     store_raw=False):
        # Load sources in dataset with proper id
        # This happens once, upon calling dataset.prepare()
        self.dataset = dataset
        self.out_dir = set_dir
        # load specifications for image Dataset
        # follows load_shapes example
        black = (0, 0, 0)
        height = 512
        width = 512
        # add DES classes
        self.add_class("des", 1, "star")
        self.add_class("des", 2, "galaxy")

        # find number of sets:
        num_sets = 0
        for setdir in os.listdir(self.out_dir):
            if 'set_' in setdir:
                # add tranining image set
                self.add_image("des",
                               image_id=num_sets,
                               path=os.path.join(self.out_dir, set_dir),
                               width=width,
                               height=height,
                               bg_color=black)
                num_sets += 1

        # store data in memory
        self.images = [None] * (num_sets)
        if store_raw:
            self.raws = [None] * (num_sets)

        self.masks = [None] * num_sets
        self.class_ids_mem = [None] * num_sets
        threads = np.clip(mp.cpu_count(), 1, num_sets)
        print("Loading images from disk.")
        pool = ThreadPool(threads)
        pool.starmap(self.load_image_disk,
                     [(i, normalize, store_raw) for i in range(num_sets)])
        if dataset == "training" or dataset == "validation":
            print("Loading masks from disk (this may take several minutes).")
            pool.map(self.load_mask_disk, range(num_sets))
        pool.close()
        pool.join()
        return
Example #22
0
 def __init_C_list(self):
     # somewhat empirical...
     if np.sum(self.__C_computed) <= (self.__n**2) / 4:
         for a in range(0, self.__n):
             C = zeros(self.__n, self.__n)
             for b in range(0, self.__n):
                 C = C + self.__P_list[b] * self.__vandermonde_inv[a, b]
             self.__C_list[a] = C
     else:
         pool = ThreadPool(4)
         for a in range(0, self.__n):
             for b in range(0, self.__n):
                 if self.__C_computed[a, b] == 0:
                     pool.starmap(self.__compute_C_ij,
                                  zip(np.where(self.__C_computed == 0)))
Example #23
0
 def exec(self, commands, **kwargs):
     """
     :param commands: the list of commands to execute for hosts or dict of list of commands indexed by host
     :return: the list of lines
     """
     from functools import partial
     commands_for_hosts = []
     output = []
     if isinstance(commands, list):
         for host in self.hosts:
             commands_for_hosts.append([host, commands])
     elif isinstance(commands, dict):
         for host in commands.keys():
             commands_for_hosts.append([host, commands[host]])
     else:
         for host in self.hosts:
             commands_for_hosts.append([host, [commands]])
     pool = ThreadPool(self.threads_num)
     raw_results = pool.starmap(partial(self.exec_on_host, **kwargs),
                                commands_for_hosts)
     results = {}
     for raw_result in raw_results:
         for host in raw_result.keys():
             results[host] = raw_result[host]
     pool.close()
     pool.join()
     return results
Example #24
0
def repSimilarity(allReps):
    keySet = list(allReps.keys())
    usedKeys = keySet[:50]
    repsToAnalyze = []
    for repName1 in usedKeys:
        for repName2 in usedKeys:
            if repName1 == repName2:
                continue
            repsToAnalyze.append([allReps, repName1, repName2])

    pool = ThreadPool(16)
    result = pool.starmap(binarySimilarity, repsToAnalyze)

    diffPartyResults = []
    for res in result:
        rep1Name, rep2Name, similarVotes, dissimilarVotes, numSameParty, rep1Party, rep2Party = res
        if numSameParty != 0:
            continue
        if similarVotes + dissimilarVotes == 0:
            continue
        ratioSame = similarVotes / (similarVotes + dissimilarVotes)
        diffPartyResults.append((ratioSame, similarVotes + dissimilarVotes,
                                 rep1Name, rep1Party, rep2Name, rep2Party))
    sortedDiffParty = sorted(diffPartyResults)
    for line in sortedDiffParty:
        print(line)
Example #25
0
def PESQ_evalpaths(reference_paths, degraded_paths, fs):
    """Compute the PESQ scores for all wavefiles in a list.

    Walks though a list of degraded wavefiles paths and computes all the scores
    with refrence wavefiles paths in the refrence list.

    Parameters
    ----------
    reference_path : str
        The path to the refrence wavfile.
    degraded_path : str
        The path to the degraded wavfile.
    fs : int
        The sample frequency should be 8000 or 16000.
    reference_dir : str
        Removes the refrence directory from the path to only save the filenames.

    Returns
    -------
    mos : 1-d float array
        The mean opinion score. Returns NAN if PESQ failed.
    mos_lqo : 1-d float array
        The mean opinion score rescaled with lqo. Returns NAN if PESQ failed.

    """
    PESQ_fs = partial(PESQ, fs=fs)

    p = Pool(THREADS)
    res = p.starmap(PESQ_fs, zip(reference_paths, degraded_paths))
    p.close()

    mos_list, mos_lqo_list = list(zip(*res))
    mos_list = np.array(mos_list, dtype=np.float)
    mos_lqo_list = np.array(mos_lqo_list, dtype=np.float)
    return mos_list, mos_lqo_list
Example #26
0
    def extractAllAndCompare(self):
        names = ["times of india", "the hindu", "guardian", "new york times", "google news", "cnn",
                 "reddit news", "reddit world news", "telegraph", "bbc"]

        outputfiles = ""

        import datetime, os
        today = str(datetime.date.today())
        directory = "./data/allFiles/" + today
        if not os.path.exists(directory):
            os.makedirs(directory)
        storageFile = directory + "/allValueFiles.txt"
        if not os.path.exists(storageFile):
            e = multiprocessing.Event()  # To synchronize progress bar
            queue = multiprocessing.Queue()  # To get score file from threaded process
            from multiprocessing.dummy import Pool as ThreadPool
            from itertools import repeat
            pool = ThreadPool(4)
            results = pool.starmap(extractorRunner.runScrapper, zip(names, repeat(e), repeat(queue)))
            pool.close()
            pool.join()

            for i in range(10):
                outputfiles += " " + queue.get()

            with open(storageFile, "w") as temp:
                temp.write(outputfiles)

        QApplication.processEvents()
        # Show comparision graph
        outputProcess = subprocess.Popen("python -m ui.comparingAll " + storageFile)
        outputProcess.wait()
        QApplication.processEvents()
Example #27
0
    def extract_bundle(self, request, replica):
        """
        Get the files and actual metadata.

        This is the main method that will extract the contents of the bundle
        and separate it into a tuple of (metadata_files, data_files), where
        the metadata_files are actual contents of the metadata files and the
        data_files are the metadata describing the files.

        :param request: The contents of the DSS event notification
        :param replica: The replica to which pull the bundle from
        """
        def get_metadata(file_name, _args):
            _metadata = {file_name: self.__get_file(*_args)}
            return _metadata

        bundle_uuid = request['match']['bundle_uuid']
        # Get the metadata and data descriptions
        metadata_files, data_files = self.__get_bundle(bundle_uuid, replica)
        # Create a ThreadPool which will execute the function
        pool = ThreadPool(len(metadata_files))
        # Pool the contents in the right format for the get_metadata function
        args = [(name, (_f['uuid'], replica))
                for name, _f in metadata_files.items()]
        results = pool.starmap(get_metadata, args)
        pool.close()
        pool.join()
        # Reassign the metadata files as a single dictionary
        metadata_files = dict(ChainMap(*results))
        return metadata_files, data_files
Example #28
0
    def _pool_query(self, query, func, attr, callback):
        """Uses :code:`query` to perform :code:`func` with kwargs :code:`attr`
        in parallel against all configured geocoders. Performs :code:`callback`
        function on the result list of addresses or locations.

        Args:
            query (str): The query component of a reverse or forward geocode.
                func (function): Function to use to obtain an answer.
            attr (dict): Keyword arguments to pass to function for each
                geocoder.
            callback (func): Function to run over iterable result.

        Returns:
            Output of `callback`.
        """
        pool = ThreadPool()
        results = pool.starmap(func,
                               zip([g.geocoder for g in self.geocoders],
                                   repeat(query),
                                   [getattr(g, attr) for g in self.geocoders]))
        pool.close()
        pool.join()
        locations = []
        for location in results:
            if isinstance(location, list):
                locations.extend(location)
            else:
                locations.append(location)
        # locations = [item for sublist in results for item in sublist]
        return callback(locations)
Example #29
0
def get_frames(df_img_url, dir_p="data/rgb/", num_try=0, num_workers=4):
    print("="*100)
    print("="*100)
    print("This function has been called for %d times." % num_try)
    if num_try > 30:
        print("Terminate the recursive call due to many errors. Please check manually.")
        return
    num_errors = 0
    arg_list = []
    # Construct the lists of urls and file paths
    for dt, df in df_img_url.groupby("date"):
        img_url_list = list(df["img_url"])
        dir_p_dt = dir_p + dt + "/"
        check_and_create_dir(dir_p) # need this line to set the permission
        check_and_create_dir(dir_p_dt)
        for i in range(len(img_url_list)):
            arg_list.append((img_url_list[i], dir_p_dt + str(i) + ".zip"))
    # Download the files in parallel
    pool = Pool(num_workers)
    result = pool.starmap(urlretrieve_worker, arg_list)
    pool.close()
    pool.join()
    for r in result:
        if r: num_errors += 1
    if num_errors > 0:
        print("="*60)
        print("Has %d errors. Need to do again." % num_errors)
        num_try += 1
        get_frames(df_img_url, num_try=num_try)
    else:
        print("DONE")
Example #30
0
def run(ipaddress, concurrency, mode, function, write, view):
    ip_list = ip_format(ipaddress)
    if mode == 'thread':
        pool = ThreadPool(concurrency)
    else:
        pool = ProcPool(concurrency)
    t1 = time.time()
    result_list = pool.map(scan, ip_list)
    t2 = time.time()
    available_ip_port = list(filter(None, result_list))
    if view:
        print('ping time: {}'.format(t2 - t1))
    if function == 'tcp':
        ip_port_iter = ((ip, port) for ip in available_ip_port
                        for port in range(PORT_RANGE[0], PORT_RANGE[-1] + 1))
        t3 = time.time()
        result_list = pool.starmap(scan, ip_port_iter)
        t4 = time.time()
        available_ip_port = defaultdict(list)
        for result in result_list:
            if isinstance(result, tuple):
                available_ip_port[result[0]].append(result[1])
        if view:
            print('tcp time: {}'.format(t4 - t3))
    pool.close()
    pool.join()
    print(available_ip_port)
    if write:
        with open(write, 'w+') as f:
            json.dump(available_ip_port, f)
Example #31
0
def compute_genome_gen_distances(genomes, gens, genome_gen_poses):
    """
    compute edit distance between all genomes in genomes based on each gen in gens, and by help of genome_gen_poses    
    :return: a dictionary contains a distance matrix for each gene
    """
    p = Pool(initializer=init_pool, initargs=(genome_gen_poses, genomes))
    # TODO for computing the result remove 0 otherwise the precomputed value will be used
    gen_edit_dists = p.starmap(
        genome_gen_distance,
        list(itertools.product(gens, range(len(genomes)),
                               range(len(genomes))))[:0])
    p.close()
    if gen_edit_dists:  # check if we computed new value or if it should use precomputed value
        print(gen_edit_dists)
    else:
        gen_edit_dists = compute_genome_gen_distances_pre_computed

    result = {}  # convert list like result to dict of matrixes
    for gen_edit_dist in gen_edit_dists:
        if gen_edit_dist is None:
            continue
        if gen_edit_dist[0] not in result:
            result[gen_edit_dist[0]] = np.full([len(genomes), len(genomes)], 0)
        result[gen_edit_dist[0]][gen_edit_dist[1]][
            gen_edit_dist[2]] = gen_edit_dist[3]
        result[gen_edit_dist[0]][gen_edit_dist[2]][
            gen_edit_dist[1]] = gen_edit_dist[3]
    return result
Example #32
0
def docc(folder_name, nt, dt, finalcut, reftime, f2,f3, node):
    global fft_all, outpath
    pool = ThreadPool(node)
    outpath = join(folder_name,"%sto%s_COR" % (str(f2),str(f3)))
    if not os.path.exists(outpath):
        os.makedirs(outpath)
    ns = len(fft_all)
    nts = (fft_all[0].stats.npts)
    lag = int(finalcut/dt)
    mid_pos = int(nts/2)
#   tcorr = np.arange(-nts + 1, nts)
#   dn = np.where(np.abs(tcorr) <= lag)[0]
    cor = fft_all[0].copy()
    cor.stats.delta = dt
    cor.stats.starttime = reftime
    sta_pair = []
    idx_lst = []
    for i in np.arange(ns-1):
        for j in np.arange(i+1,ns):
            if fft_all[i].stats.station == fft_all[j].stats.station:
                continue
            sta_pair.append("%s.%s_%s.%s" % 
                (fft_all[i].stats.station,fft_all[i].stats.channel,
                fft_all[j].stats.station,fft_all[j].stats.channel))
            idx_lst.append([i, j])
    t=time.clock()
    results = pool.starmap(compute_cc, zip(idx_lst, repeat(nts), repeat(mid_pos), repeat(lag), repeat(cor)))
    print("%d station pair using %d node:" % (len(sta_pair), node), (time.clock()-t), "s")
    pool.close()
    pool.join()
    return sta_pair
    async def scrawl(self, threads=5):

        logger.log('Scrawling Trackemon..', 'green')
        await self.client.wait_until_ready()

        # get arrays channels id need to post
        shout_out_channels = []
        for server in self.client.servers:
            for channel in server.channels:
                if channel.name in self.config.get('scrawl_channels', []):
                    shout_out_channels.append(discord.Object(channel.id))

        if len(shout_out_channels) == 0:
            raise Exception("No channel to shout out!")

        while not self.client.is_closed:
            logger.log('Scrawling Trackemon..', 'green')

            self._retrieve_session_id()

            # use multiprocessing
            if 'pokemons' in self.config.get('scrawl_trackemon'):
                pokemon_names = self.config.get('scrawl_trackemon')['pokemons']

                pool = ThreadPool(threads)
                messages = pool.starmap(self.scrawl_trackemon, zip(
                    pokemon_names, itertools.repeat(self.session_id)))

                for message in messages:
                    if len(message):
                        for channel in shout_out_channels:
                            await self.client.send_message(channel, message)

            # increase delay to finish task
            await asyncio.sleep(self.config.get('delay_scrawl', 300))
Example #34
0
def media_scraper(session, link, location, directory, post_count, username):
    print("Scraping " + location + ". Should take less than a minute.")
    pool = ThreadPool(max_threads)
    floor = math.floor(post_count / 100)
    if floor == 0:
        floor = 1
    a = list(range(floor))
    offset_array = []
    for b in a:
        b = b * 100
        offset_array.append(link.replace("offset=0", "offset=" + str(b)))
    media_set = pool.starmap(scrape_array, product(offset_array, [session]))
    media_set = [x for x in media_set if x is not None]
    media_set = list(chain.from_iterable(media_set))
    if "/users/" == directory:
        directory = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/users/onlyfans/"+username+"/"\
                    + location+"/"
    else:
        directory = directory + username + "/" + location + "/"

    print("DIRECTORY - " + directory)
    if not os.path.exists(directory):
        os.makedirs(directory)

    with open(directory + 'links.json', 'w') as outfile:
        json.dump(media_set, outfile)
    return [media_set, directory]
Example #35
0
def readAllSonars(TRIG, ECHO):
    from multiprocessing.dummy import Pool as ThreadPool
    pool = ThreadPool(len(ECHO))
    distances = pool.starmap(readSonar, zip(TRIG, ECHO))
    pool.close()
    pool.join()
    return distances[0], distances[1], distances[2], distances[3]
Example #36
0
def main(genre): 
	pool_of_threads = Pool(multiprocessing.cpu_count() - 1) # lets hope you have more than 1 cpu core...

	numbers = list(range(1,pages))

	old_results = pool_of_threads.starmap( get_painting_list, zip( numbers, itertools.repeat(genre)) ) 
	
	pool_of_threads.close()
	pool_of_threads.join()

	results = []

	for item in old_results:
		if item:
			for x in item:
				results.append(x)

	pool_of_threads = Pool(multiprocessing.cpu_count() - 1)
	pool_of_threads.starmap(downloader, zip(enumerate(results), itertools.repeat(genre) ) )
	pool_of_threads.close	
	pool_of_threads.close()
 def topN_translates(self, bot, update, number):
     chat_id = update.message.chat_id
     
     bot.sendChatAction(chat_id, ChatAction.TYPING)
     
     languages = defaultdict(int)
     
     pool = ThreadPool(4)
     args = [('https://launchpad.net/{}/+translations'.format(project),
              languages) for project in self.projects]
     
     pool.starmap(self.check_project_untranslated, args)
     pool.close()
     
     pool.join()
     
     text = ['Language - Number of translated strings']
     top = sorted(languages.items(), key=lambda x: (x[1], x[0]),
                  reverse=True)[:number]
     for index, (lang, translated) in enumerate(top):
         text.append('{0}) {1} - {2}'.format(index + 1, lang, translated))
     
     bot.sendMessage(chat_id, text='\n'.join(text))
Example #38
0
def pool_filter(
        candidates: List[Tuple[str, str]], compare_images: Callable[[str, str, float, float], bool],
        aspect_fuzziness: float, rms_error: float, chunk_size: float
) -> List[Tuple[str, str]]:
    pool = Pool(None)
    return [
        c
        for c, keep in zip(
            candidates,
            pool.starmap(
                partial(compare_images, aspect_fuzziness=aspect_fuzziness, rms_error=rms_error),
                candidates, chunksize=chunk_size
            )
        )
        if keep
    ]
Example #39
0
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'exepath', help='path to the trace dump executable', type=file_type)
    parser.add_argument('files', nargs='*')
    parser.add_argument('-j', type=parallel_arg_type,
                        help='number of parallel processes', default=1)
    args = parser.parse_args(argv[1:])
    exepath = args.exepath

    files = []
    for wildcard in args.files:
        files.extend(glob.glob(wildcard))

    if len(files) == 0:
        print("No input files found!")
        return 1

    # Test the executable first.
    valid = False
    try:
        p = subprocess.Popen([
            exepath,
            '--log_file=stdout',
        ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        p.wait()
        if p.returncode == 5:
            # code 5 = invalid trace file / trace file unspecified
            valid = True
    except OSError:
        pass

    if not valid:
        print("Trace executable invalid!")
        return 1

    print("Processing...")
    pool = Pool(args.j)

    start_time = time.perf_counter()
    results = pool.starmap(run_dumper, zip(itertools.repeat(exepath), files))
    pool.close()
    pool.join()

    elapsed_time = time.perf_counter() - start_time
    print("entire runtime took %.3f seconds" % elapsed_time)
    return 0
Example #40
0
def backup_all_servers():
    # Get a list of up and down servers
    hosts_up, hosts_down = check_alive_hosts(ssh_test_cmd)

    # Prepare the list for starmap two arguments.
    hosts_up_two_args = []

    # Add the argument to hosts list and append to separate list.
    # Output should be something like [('a', 1), ('b', 1), ('c', 1)]
    for i in zip(hosts_up, repeat(rsync_stdout)):
        hosts_up_two_args.append(i)

    # Make the Pool of workers
    pool = ThreadPool(number_of_threads)
    results = pool.starmap(rsync_start, hosts_up_two_args)
    pool.close()
    pool.join()
Example #41
0
def backup_one_server(server_ip):
    # This function need the parameter as a list
    # Prepare the list for starmap two arguments.
    hosts_up = []
    hosts_up_two_args = []

    hosts_up.append(server_ip)

    # Add the argument to hosts list and append to separate list.
    # Output should be something like [('a', 1), ('b', 1), ('c', 1)]
    # Always run backup one server in background ( append 0 )
    for i in zip(hosts_up, repeat(0)):
        hosts_up_two_args.append(i)

    # Make the Pool of workers
    pool = ThreadPool(number_of_threads)
    results = pool.starmap(rsync_start, hosts_up_two_args)
    pool.close()
    pool.join()
def spider_kaiyuan(start,end,password):
    file_name = '账号{0}-{1}.txt'.format(start,end)
    with open(file_name, 'a+') as f:
        for account in range(int(start), int(end)):
            if (auth_account(account=account,password=password)):
                f.write('账号:{} 密码{}'.format(account,password) )
                f.flush()



def generate_func_args(start,end,password):
    return (start,end,password)

if __name__ == '__main__':

    pool = ThreadPool(5)
    args = []
    account_start = 888800000000
    password = input("输入测试密码 测试区间在{} -{}\n".format(account_start+5*10000,account_start+9*10000+9999))
    for i in range (5,9):
        args_map = generate_func_args(account_start+i*10000,account_start+i*10000+9999,password)
        args.append(args_map)

    pool.starmap(spider_kaiyuan, args)





    return paramList

def processSpider(url, filePath, threadName):
    sTime = time.time()
    session = requests.session()
    session.headers['User-Agent'] = random.choice(UserAgent)
    response = session.get(url)
    # todo add message if response code != 200
    print(response.status_code)
    if response.status_code == 200:
        f = open(filePath, 'w+', encoding='utf-8')
        f.write(response.text)
        print(threadName + ": " + str(time.time() - sTime))
        f.close()

def getSession():
    session = requests.session()
    session.headers['User-Agent'] = random.choice(UserAgent)
    return session



# main()

if __name__ == '__main__':
    pool = ThreadPool(20)
    paramList = getParam()
    print(paramList)
    pool.starmap(processSpider, paramList)

Example #44
0

Rseq.print_line()
# Unzipping gzipped files and adding correct extension

if ext == 'gz':
    os.system('mkdir gzipped_reads')
    print('\nYour files are compressed. They will be decompressed.')
    if options.extension == 'gz': # checks whether commandline tool is used
        ext = options.ext_unzip
    else:  # or the interactive dialogue
        ext = input('Please specify the file extension of the decompressed file [fasta, fastq]: ')
    # Parallelized extraction and copying
    print('Extracting files')
    pool = ThreadPool(int(thread_no))
    pool.starmap(Rseq.gz_process, zip(files, itertools.repeat(ext)))
    pool.close()
    pool.join()

# Executing the FastQC algorithm
adap_set = adap_max
Rseq.print_line()
if exec_adapters in ['y', 'Y', 'yes']:
    # Analyzing the data with FastQC
    print('\nFastQC data analysis\n')
    Rseq.fqc([file_name + '.' + ext for file_name in fnames], thread_no)
    print('\nFastQC finished\n')

    # Generating the adapter list
    for fname in fnames:
Example #45
0
        query += ' -query ' + fasta_file
        query += ' -out ' + blast_output_file + ' -outfmt 6'
        os.system(query)
        with open(blast_output_file, 'r', encoding='utf-8') as f2:
            while True:
                line = f2.readline().split('\t')
                if float(line[3]) > 0.9 * len(seq):
                    ofile.write(name + '\t' + line[3] + '\t' + line[4])
                    ofile.write('\t' + line[5] + '\t' + line[6] + '\t' + line[7])
                    ofile.write('\t' + line[8] + '\t' + line[9] + '\t' + line[10])
                else:
                    break
        os.remove(fasta_file)
        os.remove(blast_output_file)
        return(out)
    else:
        out = -1
        return(out)


with open(completeGenome, 'r') as f:
    t = 0
    o = 'output.txt'
    while True:
        t = oneSequenceBlast(f, t, o)
        if t == -1:
            break

pool = Pool()
results = pool.starmap(oneSequenceBlast, mailsList)
            os.system('scrapy crawl {0} -o {1}/{0}_items.json -t json'.format(
                spider_name, scraped_data_dir))
        else:
            raise FileNotFoundError(
                'Directory not found! Please check Scrapy project root and output directory paths '
                'passed in as arguments.')
    return


def arg_parser():
    parser = argparse.ArgumentParser(
        description='Script designed to run a series of spider scrapers to scrape data from '
                    'remote sites.')
    parser.add_argument(
        '-p', dest='scrapy_project', required=True, nargs='?', type=str,
        help='location to scrapy project root directory.')
    parser.add_argument(
        '-o', dest='output_dir', required=True, nargs='?', type=str,
        help='location to output directory.')
    return vars(parser.parse_args())


if __name__ == '__main__':
    params = arg_parser()
    output_dir = params['output_dir']
    project_dir = params['scrapy_project']
    pool = ThreadPool(len(SPIDERS))
    pool.starmap(
        crawl, zip(itertools.repeat(project_dir), itertools.repeat(output_dir), SPIDERS))
    pool.close()
Example #47
0
hardcordeMode = True  # Use this if you don't want words of length 2

# Setting working directory to path of current file
os.path.dirname(os.path.abspath(__file__))

# Checking if thesaurus exists and is correctly formatted
# If not, create it
if hardcordeMode:
    print("You are in hardcore mode")
    if not checkThesaurus(formattedThesaurusPath, tSizeHardcore):
        createThesaurusHardcore(originalThesaurusPath, formattedThesaurusPath)
else:
    if not checkThesaurus(formattedThesaurusPath, tSize):
        createThesaurus(originalThesaurusPath, formattedThesaurusPath)

# Open the new thesaurus and store it in memory
f = open(formattedThesaurusPath, encoding="utf-8")
thesaurus = list(f)
f.close()

# Creates the list of arguments for parallel function
mailsList = []
for root, dirs, files in os.walk(mailsPath, topdown=False):
    for f in files:
        temp = [os.path.join(root, f), thesaurus, outputPath]
        mailsList.append(temp)

# Parallel baby
pool = ThreadPool()
results = pool.starmap(parallelTagging, mailsList)
Example #48
0
    eTime = time.time()
    logging.info(fileName + " takes time:" + str(eTime - sTime) + ", url:" + url)


def getDownloadParam(folder, suffix, list):
    paramList = []
    # for i in range(1, 1192):
    for i in list:
        index = str(i)
        url = mainUrl + index + endUrl
        fileName = 'file-' + index + '.' + suffix
        param = (url, folder, fileName)
        paramList.append(param)
    return paramList


if __name__ == '__main__':
    logging.basicConfig(filename="F:\Workspace\_data\log\\resumeSpider\logging-11-27-2.log", level=logging.INFO)
    folder = "H:\\resume4"
    startTime = time.time()
    pool = ThreadPool(20)
    errorRange = getNumbersFromErrLog()
    # normalRange = range(1, 1200)
    paramList = getDownloadParam(folder, "zip", errorRange)
    pool.starmap(downloadResume, paramList)
    endTime = time.time()
    logging.info('take time:'+ str(endTime-startTime))


class MySteamFriends(object):

    def __init__(self,
                 api_key: str,
                 steam_username: str = None,
                 steam_id: str = None,
                 debugging: bool = False,
                 concurrent_api: int = 4):
        """Initialises a connection to the Steam Web API and populates a list of friends.

        Args:
            api_key (str): API key from https://steamcommunity.com/dev/apikey
            steam_username (Optional[str]): steam username to base friends list from
            steam_id (Optional[str]): steam ID to base friends list from (alternative to steam_username)
            debugging (Optional[bool]): Enable debugging info.  Defaults to off
            concurrent_api (Optional[int]): How many concurrent Steam API subprocesses to run
        """

        if api_key is "":
            raise NameError("You don't have an api_key set!")
        if debugging is True:
            basicConfig(stream=sys.stdout, level=DEBUG)

        self.steam_api = WebAPI(key=api_key)

        if steam_id is None and steam_username is None:
            raise NameError("You don't call MySteamFriends with steam_username or steam_id (either required).")
        self.my_steam_id = steam_id
        if self.my_steam_id is None:
            self.my_steam_id = self.__get_my_steam_id(steam_username)

        self.friends_list = self.__get_my_friends_list()
        self.my_games_list = self.get_users_games(self.my_steam_id)
        self.total_gametime = self.get_my_total_playtime()

        self.api_pool = ThreadPool(concurrent_api)

        debug("api_key: %s, steam_ide: %s, my_steam_id: %s" % (api_key, self.my_steam_id, self.my_steam_id))

    def __get_my_steam_id(self, steam_user: str) -> str:
        try:
            return self.steam_api.ISteamUser.ResolveVanityURL(vanityurl=steam_user, url_type=1)['response']['steamid']
        except KeyError:
            raise NameError("That steam username doesn't exist!")


    def __get_my_friends_list(self) -> dict:
        friends = self.steam_api.ISteamUser.GetFriendList(steamid=self.my_steam_id)['friendslist']['friends']
        friends_list = [f['steamid'] for f in friends]
        friends_list.append(self.my_steam_id)
        return friends_list

    def __populate_my_friends_list(self):
        friends_list_detailed = self.api_pool.map(self.get_steam_user_dict, self.friends_list)
        return friends_list_detailed

    def _get_game_user_info_dict(self, uid: str, appid: str) -> dict:
        gameinfo = self.get_game_user_info(uid, appid)
        if gameinfo:
            return {uid: gameinfo}

    def get_my_total_playtime(self) -> int:
        total = 0
        for game in self.my_games_list:
            total += int(game['playtime_forever'])
        return total

    def get_game_name(self, appid: str) -> str:
        return [game['name'] for game in self.my_games_list if str(game['appid']) == appid][0]

    def get_steam_user(self, sid: str) -> dict:
        return self.steam_api.ISteamUser.GetPlayerSummaries(steamids=sid)['response']['players'][0]

    def get_steam_username(self, sid: str) -> str:
        return self.get_steam_user(sid)['personaname']

    def get_steam_user_dict(self, sid: str) -> dict:
        result = self.get_steam_user(sid)
        if result:
            return {sid: result}

    def get_users_games(self, sid: str) -> dict:
        result = self.steam_api.IPlayerService.GetOwnedGames(steamid=sid, include_played_free_games=1,
                                                           include_appinfo=1, appids_filter=0)['response']
        if 'games' in result:
            return result['games']

    def get_game_user_info(self, uid: str, appid: str) -> dict:
        games = self.get_users_games(uid)
        if games:
            return [game for game in games if str(game['appid']) == appid]

    def get_everyones_gamestats(self, appid: str) -> dict:
        # steam API is slow; uses threading to submit concurrent requests
        results = self.api_pool.starmap(self._get_game_user_info_dict, zip(self.friends_list, itertools.repeat(appid)))

        # transform result containing actual results, where key is sid
        result_dict = {}
        for result in list(filter(None.__ne__, results)):
            for key, value in result.items():
                result_dict[key] = value[0]

        return result_dict

    def get_game_stats_detailed(self, gamestats: dict) -> list:
        # steam API is slow; uses threading to submit concurrent requests
        return self.api_pool.map(self._combine_steam_user_game_stats, gamestats.items())

    def _combine_steam_user_game_stats(self, data: tuple) -> dict:
        for sid, game in [data]:
            return ({
                "steam_user": self.get_steam_user(sid),
                "game_stats": game
            })