Example #1
0
def configure_ensemble(zk_id_ip_pairs, dynamic_file, conf_dir, data_dir):
    '''Configures zookeeper ensemble with zookeeper instances.
   After configuration, it starts the zookeeper server.
   '''
    log.info('Doing a fresh Zookeeper ensemble configuration')
    log.info('Wiping out old state')
    _cmd_delete_old_state(data_dir)

    log.info('Resetting static configuration')
    _cmd_reset_config(dynamic_file, conf_dir)

    # Add hosts as participants to the ensemble configuration
    log.info('Resetting dynamic configuration')
    configs = []
    for pair in zk_id_ip_pairs:
        zk_id = pair[0]
        zk_ip = pair[1]
        config = """server.{id}={ip}:2888:3888:participant;{port}""".format(
            id=zk_id, ip=zk_ip, port=ZK_PORT)
        configs.append(config)

    ensemble_config = '\n'.join(configs)
    utils.save_to_file(dynamic_file, ensemble_config)
    start_zookeeper(conf_dir)
    log.info('Ensemble Configured.')
Example #2
0
def step3_fill_lengths():
    """Retrieve the lengths of the pages via APIs"""
    cuisines = load_from_file('data/cuisines_langs.dat')

    # TODO: refactor grouping together pages, do only one request for every xyz.wikipedia.org
    params = {'action': 'query', 'prop': 'info', 'format': 'json'}
    skipped = []
    for kk, vv in tqdm(cuisines.items()):
        for lang_prefix, page in tqdm(vv['languages'].items()):
            if lang_prefix != 'en':
                wiki_url = page['wiki_url']
                api_url = f'https://{wiki_url}/w/api.php'
                params['titles'] = page['title']
                with requests.Session() as session:
                    post = session.post(api_url, params)
                    if post.ok:
                        res = post.json()
                    else:
                        print("Issue in POST call")
                        print(f"{api_url}\n{params}")
                page_data = res['query']['pages'][next(
                    iter(res['query']['pages']))]
                if 'length' in page_data:
                    vv['languages'][lang_prefix]['length'] = page_data[
                        'length']
                else:
                    skipped.append((kk, lang_prefix))
    if skipped:
        for page, lang in skipped:
            print(f"[Skip] {page} in language {lang} (unavailable length)")
    save_to_file('data/cuisines_length.dat', cuisines)
Example #3
0
def step2_populate_other_languages():
    """Gets URLs and titles of cuisines in multiple languages"""
    cuisines_raw = load_from_file('data/cuisines_raw.dat')

    wiki_url = 'https://en.wikipedia.org/w/api.php'
    params = {
        'action': 'query',
        'prop': 'langlinks|info',
        'llprop': 'url',
        'lllimit': 'max',
        'format': 'json'
    }
    print("Getting links for every cuisine for every language...")
    for vv in tqdm(cuisines_raw.values()):
        pageid = vv['pageid']
        params['pageids'] = pageid
        with requests.Session() as session:
            post = session.post(wiki_url, params)
            res = post.json()
            res_info = res['query']['pages'][pageid]
        if 'langlinks' in res_info:
            vv['languages'] = {
                vv['lang']: {
                    'title': vv['*'],
                    'wiki_url': strip_url(vv['url'])
                }
                for vv in res_info['langlinks']
            }
            vv['languages']['en'] = {}
            vv['languages']['en']['length'] = res_info['length']
            vv['languages']['en']['title'] = res['query']['pages'][pageid][
                'title']
    save_to_file('data/cuisines_langs.dat', cuisines_raw)
    def update(self):
        if WORKERLISTGEN_SERVICE_URI and WORKERLISTGEN_CONTAINER_URI and API_AUTH:
            file_content = '# this file was auto-generated using workerlistgen\n'
            config_lines = InitHelper.get_config_lines(WorkerlistGen.linked_names)
            for config_line in config_lines:
                file_content += config_line
                file_content += '\n'

            save_to_file(CITUS_WORKERLIST_CONFIG_FILE, file_content)

            # find containers on this machine functioning as Citus master
            filters = dict()
            labels = []
            labels.append('com.docker.compose.project=' + CITUS_STACK_NAME)
            labels.append('com.docker.compose.service=' + CITUS_SERVICE_NAME)
            filters['label'] = labels

            citus_masters = docker.containers(filters=filters)

            for citus_master in citus_masters:
                logger.info("Sending container '%s' signal '%d'",
                            citus_master['Id'], signal.SIGHUP)
                docker.kill(citus_master['Id'], signal.SIGHUP)

            logger.info("===========END===========")
        else:
            raise RuntimeError('Docker Cloud environment variables not set')
Example #5
0
def reconfigure_ensemble(region, zookeeper_id, zookeeper_ip, running_ids,
                         ensemble_ip, dynamic_file, conf_dir, log_group):
    ''' Reconfigures the zookeeper ensemble by adding a new server to it. '''

    # Get and reset the static configuration
    # The static file changes the path of the dynamic file location.
    log.info('Resetting static configuration')
    _cmd_reset_config(dynamic_file, conf_dir)

    # Add host as an observer to the ensemble configuration
    log.info('Resetting dynamic configuration')
    config = _cmd_get_zookeeper_configuration(ensemble_ip)
    config += "\nserver.{id}={ip}:2888:3888:observer;{port}".format(
        id=zookeeper_id, ip=zookeeper_ip, port=ZK_PORT)
    utils.save_to_file(dynamic_file, config)
    start_zookeeper(conf_dir)

    # Wait a bit for Zookeeper to initialize itself
    # For some reason it crashes the moment we try to reconfigure it
    log.info('Sleeping for a bit')
    time.sleep(30)

    # Remove ids from the ensemble
    log.info('Reconfiguration by removing')
    remove_zookeeper_nodes(region, ensemble_ip, running_ids, log_group)

    # Add host as participant to the ensemble with "add" command
    log.info('Reconfiguration by adding')
    log.info('Adding id %s' % zookeeper_id)
    add_zookeeper_node(ensemble_ip, zookeeper_ip, zookeeper_id)
    log.info('Ensemble Reconfigured.')
Example #6
0
    def export(cls, rules):
        """
        Export rules to the provisory config file.

        `rules` are tuples (rule, score).
        """
        save_to_file("corpus/contextual_rules.pdg",
                     "\n".join(rule for rule, score in rules))
Example #7
0
 def export(cls, rules):
     """
     Rules are tuples (rule, score)
     """
     save_to_file("corpus/lexical_rules.pdg",
                  "\n".join("%s\t%f" % (rule, float(score))
                  for rule, score
                  in sorted(rules, key=itemgetter(1), reverse=True)))
Example #8
0
 def export(cls, rules):
     """
     Rules are tuples (rule, score)
     """
     save_to_file("corpus/lemmatizer_rules.pdg",
                  "\n".join("%s\t%f" % (rule, float(score))
                  for rule, score
                  in rules))
Example #9
0
 def auto_save(self):
     current_time = self._origo[PROP_TEXT_TIMER]
     if self._auto_save_time + INT_AUTO_SAVE_INTERVAL <= current_time:
         # Update last-time checked value
         self._auto_save_time = current_time
         # Save created mesh
         save_to_file(path=INT_AUTO_SAVE_FILE,
                      data=self._surface.serialise())
         print('[OKAY] file has been auto-saved to:', INT_AUTO_SAVE_FILE)
Example #10
0
def main():
    packages = remove_irrelevant_packages(get_top_packages(), TO_CHART)
    annotate_wheels(packages)
    wheel_types = ['manylinux1_py3', 'manylinux1_py2',
                   'win32_py3', 'win32_py2',
                   'win_amd64_py3', 'win_amd64_py2',
                   'macos_py2', 'macos_py2']
    packages = [p for p in packages if p['is_c_module'] or any([p[t] for t in wheel_types])]
    save_to_file(packages, 'results.json')
    generate_svg_wheel(packages, len(packages))
Example #11
0
def add_to_corpus(article_id):
    """
    Retrieve an article in db, clean it, and add it to corpus.
    """
    t = Article.objects.get(pk=article_id).content
    t = normalize_text(unescape_entities(t))
    t = t.encode("utf-8")
    t = t.decode("string_escape")
    save_to_file("corpus/%s.txt" % article_id, t)
    print normalize_text(t)
Example #12
0
def initialize(region, instance_id, id_file, log_group):
    ''' Initializes the zookeeper instance with a valid zookeeper id '''
    log.info('Initializing instance, instance_id=%s' % instance_id)
    zk_id = aws.get_tag(region, instance_id, ZK_ID_TAG)
    if not zk_id:
        zk_id = get_zookeeper_id(region, log_group)
        aws.set_tag(region, instance_id, ZK_ID_TAG, zk_id)
    utils.save_to_file(id_file, zk_id)
    log.info('Initialized with zookeeper_id=%s' % zk_id)
    return zk_id
Example #13
0
def generate_cache_json(data, wavs):
    cache = {}
    for row in data:
        value = (row['speed'], row['pitch'], row['voice'])
        for ai_output in row['ai_outputs']:
            key = ai_output['wav_filename']
            if key not in wavs:
                continue
            cache[key] = value

    utils.save_to_file(cache, settings.CACHE_JSON_PATH)
Example #14
0
def download_js(parsed_data, folder, base_url):
    # find all js
    links = [sc["src"] for sc in parsed_data.find_all("script", src=True)]
    for link in links:
        filename = re.search(r'/([^/]+)$', link)
        link = transform_url(link, base_url)
        if not filename or link is None:
            continue

        response = requests.get(link)
        if response.ok:
            save_to_file(response.content, folder + filename.group(1))
Example #15
0
def download_media(parsed_data, folder, base_url):
    # find all jpg, png, gif, svg
    links = set(
        [link['href'] for link in parsed_data.findAll('link', href=True)] +
        [img['src'] for img in parsed_data.find_all('img', src=True)])
    for link in links:
        filename = re.search(r'/([\w_\-.]+[.](jpg|gif|png|jpeg|svg))$', link)
        link = transform_url(link, base_url)
        if not filename or link is None:
            continue

        response = requests.get(link)
        if response.ok:
            save_to_file(response.content, folder + filename.group(1))
Example #16
0
def compare_sok_and_tf(args):
    sok_results = test_sok_multi_dense_emb(args)
    utils.save_to_file("./sok_results_" + str(args.task_id) + ".file",
                       sok_results)

    barrier = hvd.allreduce(tf.zeros([1]))

    # if args.task_id != 0:
    #    return

    tf_results = test_tf_multi_dense_emb(args)

    all_sok_results_list = list()
    for i in range(args.worker_num):
        sok_results = utils.restore_from_file("./sok_results_" + str(i) +
                                              ".file")
        sok_results = tf.concat(sok_results,
                                axis=0)  # [iter-num, replica-bs, vectors]
        all_sok_results_list.append(sok_results)
    all_sok_results_list = tf.concat(all_sok_results_list, axis=1)
    all_sok_results_list = tf.split(all_sok_results_list,
                                    num_or_size_splits=len(tf_results),
                                    axis=0)
    all_sok_results_list = [tf.squeeze(item) for item in all_sok_results_list]

    if len(all_sok_results_list) != len(tf_results):
        raise ValueError(
            "The length of sok results is not equal to that of tensorflow.")

    if args.dynamic_input == 1:
        atol = 1e0
        rtol = 1e-2
    elif args.mixed_precision:
        atol = 1e-2
        rtol = 1e-2
    else:
        atol = 1e-4
        rtol = 1e-4
    for i, sok_vector in enumerate(all_sok_results_list):
        tf.debugging.assert_near(
            tf.reshape(sok_vector, shape=[-1, tf.shape(sok_vector)[-1]]),
            tf_results[i],
            atol=atol,
            rtol=rtol,
            message=("the values is not consistent on Iteration: %d" % i))

    print("\n[INFO]: For multiple dense embedding layer: with Horovod, the embedding"+\
          " vectors obtained from SOK and TF are consistent for %d iterations."
          " With mixed_precision = %s"
          %(len(sok_results), args.mixed_precision))
def test_tf_dense_model(args, init_tensors, *random_samples):
    dataset = utils.tf_dataset(*random_samples,
                               batchsize=args.global_batch_size,
                               to_sparse_tensor=False,
                               repeat=1)

    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

    tf_dense_demo = TfDenseDemo(init_tensors, args.global_batch_size,
                                args.slot_num, args.nnz_per_slot,
                                args.embedding_vec_size)

    optimizer = utils.get_dense_optimizer(args.optimizer)(learning_rate=0.1)
    if args.mixed_precision:
        optimizer = tf.keras.mixed_precision.LossScaleOptimizer(
            optimizer, initial_scale=1024)

    @tf.function
    def _train_step(inputs, labels):
        with tf.GradientTape() as tape:
            logit, embedding_vector = tf_dense_demo(inputs, training=True)
            loss = loss_fn(labels, logit)
            if args.mixed_precision:
                _loss = optimizer.get_scaled_loss(loss)
            else:
                _loss = loss
        grads = tape.gradient(_loss, tf_dense_demo.trainable_variables)
        if args.mixed_precision:
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(zip(grads,
                                      tf_dense_demo.trainable_variables))
        return loss, embedding_vector

    tf_results = list()

    for i, (input_tensors, labels) in enumerate(dataset):
        print("-" * 30, str(i), "-" * 30)
        loss, embedding_vector = _train_step(input_tensors, labels)
        print("[INFO]: iteration {}, loss {}".format(i, loss))
        tf_results.append(embedding_vector.numpy())

    if not hasattr(args, "task_id"):
        args.task_id = 0
    if 1 == args.save_params and args.task_id == 0:
        filepath = r"./embedding_variables/"
        utils.save_to_file(os.path.join(filepath, r"tf_variable.file"),
                           tf_dense_demo.params.numpy())

    return tf_results
Example #18
0
def download_css(parsed_data, folder, base_url):
    # find all css
    links = [
        link['href']
        for link in parsed_data.findAll('link', href=True, rel="stylesheet")
    ]
    for link in links:
        filename = re.search(r'/([^/]+)$', link)
        link = transform_url(link, base_url)
        if not filename or link is None:
            continue

        response = requests.get(link)
        if response.ok:
            save_to_file(response.content, folder + filename.group(1))
Example #19
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', required=True, help='Input file')
    parser.add_argument('--output', required=True, help='Output file')
    args = parser.parse_args()

    logging.basicConfig(filename="1-4.log", level=logging.INFO)

    need_args = ('matrix', 'eps')
    init_dict = read_data(args.input, need_args)
    A, eps = init_dict['matrix'], init_dict['eps']

    values, vectors = jacobi_eigenvalue(A, eps)
    numpy_eigs(A, values, vectors)
    save_to_file(args.output, eigenvalues=values, eigenvectors=vectors)
 def _update_haproxy(self, cfg):
     if self.link_mode in ["cloud", "new"]:
         if Haproxy.cls_cfg != cfg:
             logger.info("HAProxy configuration:\n%s" % cfg)
             Haproxy.cls_cfg = cfg
             if save_to_file(HAPROXY_CONFIG_FILE, cfg):
                 Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process)
         elif self.ssl_updated:
             logger.info("SSL certificates have been changed")
             Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process)
         else:
             logger.info("HAProxy configuration remains unchanged")
         logger.info("===========END===========")
     elif self.link_mode in ["legacy"]:
         logger.info("HAProxy configuration:\n%s" % cfg)
         if save_to_file(HAPROXY_CONFIG_FILE, cfg):
             UpdateHelper.run_once()
Example #21
0
def main():
    # https://www.zhihu.com/collection/38887091?ssr_src=heifetz&page=1
    # https://www.zhihu.com/collection/38887091?ssr_src=heifetz&page=2
    # https://www.zhihu.com/collection/38887091?ssr_src=heifetz&page=3
    # 用于将所有 anser 的字典形式保存在一个 list, 便于插入 mongodb
    anser_list = []
    for i in range(1, 11):
        url = 'https://www.zhihu.com/collection/38887091?ssr_src=heifetz&page={}'.format(
            i)
        ansers = models_from_url(url)
        # 合并 list
        anser_list += ansers
    log('anser_list', type(anser_list), len(anser_list))
    # 两种存储数据的方式, mongodb, 写入数据文件(json格式)
    # insert_many(db, 'zhihuAnser', anser_list)
    # 写入文件
    save_to_file(anser_list, 'zhihuAnser.txt')
def save_xml():
    'does not work with archive'
    texts = [cls.get_xml() for cls in classes]
    [
        save_to_file(cls, text, format='xml')
        for cls, text in zip(classes, texts)
    ]
    return [['Saved to XML', [('green', 'ok')]]]
Example #23
0
    def parse_svg_file(self, file_path, class_):
        print("Parsing %s ..." % file_path)

        if class_ in cache.class_text.keys():
            return

        if os.path.exists('./idx/svg_text/' + class_ + '.txt'):
            # Load in
            text = open('./idx/svg_text/' + class_ + '.txt', 'r').read()
            cache.class_text[class_] = text.split('\n')
            return

        root = ET.parse(file_path).getroot()
        # root.tag = {http://www.w3.org/2000/svg}svg
        namespace = re.findall('\{.*}', root.tag)[0]
        # print(namespace)

        # # get ids
        # pos_y = []
        # for item in root.findall('.//%spath' % namespace):
        #     #print(item.get('d').split()[1])
        #     # M0 31 H600
        #     pos_y.append(item.get('d').split()[1])
        #
        # if not pos_y:
        #     for item in root.findall('.//%stext' % namespace):
        #         pos_y.append(item.get('y'))

        # get content
        contents = ""
        for item in root.findall('.//%stextPath' % namespace):
            #print(item.text)
            contents = contents + item.text + "\n"

        if not contents:
            for item in root.findall('.//%stext' % namespace):
                # print(item.text)
                contents = contents + item.text + "\n"

        # assert (len(pos_y) == len(contents))

        # for pos, content in zip(pos_y, contents):
        #     print(pos, content)

        utils.save_to_file('./idx/svg_text/', class_ + '.txt', contents)
        cache.class_text[class_] = contents.split('\n')
 def _update_haproxy(self, cfg):
     if HAPROXY_SERVICE_URI and HAPROXY_CONTAINER_URI and API_AUTH:
         if Haproxy.cls_cfg != cfg:
             logger.info("HAProxy configuration:\n%s" % cfg)
             Haproxy.cls_cfg = cfg
             if save_to_file(HAPROXY_CONFIG_FILE, cfg):
                 Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process)
         elif self.ssl_updated:
             logger.info("SSL certificates have been changed")
             Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process)
         else:
             logger.info("HAProxy configuration remains unchanged")
         logger.info("===========END===========")
     else:
         logger.info("HAProxy configuration:\n%s" % cfg)
         save_to_file(HAPROXY_CONFIG_FILE, cfg)
         UpdateHelper.run_once()
 def _update_haproxy(self, cfg):
     if self.link_mode in ["cloud", "new"]:
         if Haproxy.cls_cfg != cfg:
             logger.info("HAProxy configuration:\n%s" % cfg)
             Haproxy.cls_cfg = cfg
             if save_to_file(HAPROXY_CONFIG_FILE, cfg):
                 Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process)
         elif self.ssl_updated:
             logger.info("SSL certificates have been changed")
             Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process)
         else:
             logger.info("HAProxy configuration remains unchanged")
         logger.info("===========END===========")
     elif self.link_mode in ["legacy"]:
         logger.info("HAProxy configuration:\n%s" % cfg)
         if save_to_file(HAPROXY_CONFIG_FILE, cfg):
             UpdateHelper.run_once()
Example #26
0
def get_wikimedia_languages_list():
    """Download and create a correlation dict from language prefixes to long language names"""
    wiki_languages = {}
    req = requests.get(
        'https://meta.wikimedia.org/wiki/Table_of_Wikimedia_projects')
    soup = BeautifulSoup(req.text, features='html.parser')
    table = soup.find_all('table', class_='sortable')[0]
    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if not tds:
            continue
        code, english_name, local_name = [td.text.strip() for td in tds[:3]]
        code = code.replace(':', '')
        wiki_languages[code] = {
            'eng_name': english_name,
            'local_name': local_name
        }
    save_to_file('data/wiki_languages.dat', wiki_languages)
Example #27
0
def step1_prepare_cuisines_data():
    """Create a data structure starting from the cuisines list in Template:Cuisines"""
    req = requests.get('https://en.wikipedia.org/wiki/Template:Cuisines')
    soup = BeautifulSoup(req.text, features='html.parser')
    html_cuisines = soup.find(title='National dish').find_next('ul')
    cuisines_titles = []
    skipped = []
    for ch in html_cuisines:
        if not isinstance(ch, str):
            if len(ch.find_all('a')) > 1:
                # If it has sub-cuisines (regional ones) consider only the first
                cuisine = ch.find_all('a')[0]
            else:
                # If it's only a national cuisine
                cuisine = ch.find('a')
            # If it's not a redirect to a different page (e.g.: "cuisine" section in the country page)
            if not cuisine.get('class'):
                title, href = cuisine.get('title'), cuisine.get('href')
                cuisines_titles.append(
                    (title, unquote(href.replace('/wiki/', ''))))
            elif 'mw-redirect' in cuisine.get('class'):
                skipped.append(cuisine.get('title'))
            else:
                raise ValueError(f"Undefined case: {cuisine}")
    if skipped:
        for skip in skipped:
            print(f"[Skip] {skip} (redirect)")

    api_url = 'https://en.wikipedia.org/w/api.php'
    params = {'action': 'query', 'format': 'json'}
    cuisines_raw = {}
    for chunk in split_to_chunks(cuisines_titles, 50):
        params['titles'] = f"{'|'.join([c[1] for c in chunk])}"
        with requests.Session() as session:
            post = session.post(api_url, params)
            res = post.json()
        for vv in res['query']['pages'].values():
            cuisines_raw[vv['title']] = {
                'pageid': str(vv['pageid']),
                'languages': {}
            }
    save_to_file('data/cuisines_raw.dat', cuisines_raw)
Example #28
0
def parse_file(path):
    """It parses the corpus file into a list of lists,
    used for experimental purposes instead of parsing the
    whole corpus every time.

    Arguments:
        path {[str]} -- [path to corpus output]

    Returns:
        [list of lists] -- [sentences of the corpus]
    """
    sentences = []
    with open(path, encoding='utf-8', mode='r') as f:
        lines = f.readlines()
        for line in tqdm(lines,
                         desc=f'Fetching {path[path.rfind("/") + 1:]} output'):
            line_ = ast.literal_eval(line)
            sentences.append(line_)
    save_to_file(sentences, path)
    return sentences
Example #29
0
    def test_save_and_load(self):
        team_members = main.get_team_members()
        team_data = {'date': team_members}
        # save data to file
        data_file_name = '../data/test/web_data_json_test.data'

        utils.save_to_file(data_file_name, team_data)
        #
        # test loading from the file and if the name of Johanna can be found
        #
        member_data = utils.load_from_file(data_file_name)
        a_day_data = {}
        # take the first item in the dictionary; doesn'e matter which one it is
        for key in member_data:
            a_day_data = member_data[key]
            break
        found_Johanna = False
        for d in a_day_data:
            if d['name'] == 'Johanna Nicoletta':
                found_Johanna = True
        self.assertEqual(found_Johanna, True, "Can not save or load from file")
Example #30
0
 def _update_haproxy(self, cfg):
     if Haproxy.cls_cfg != cfg:
         logger.info("HAProxy configuration:\n%s" % cfg)
         Haproxy.cls_cfg = cfg
         if save_to_file(HAPROXY_CONFIG_FILE, cfg):
             UpdateHelper.run_reload()
     elif self.ssl_updated:
         logger.info("SSL certificates have been changed")
         UpdateHelper.run_reload()
     else:
         logger.info("HAProxy configuration remains unchanged")
     logger.info("===========END===========")
    def manipulate_homophily(self, strategy_func, strategy_name, pick_strategy,
                             manipulation_clas, network_name):
        self.global_homophilies = []
        class_partitions = []
        nodes_with_manipulation_clas = [
            node for node in self.G.nodes()
            if self.get_node_class(node) == manipulation_clas
        ]
        class_partitions.append(len(nodes_with_manipulation_clas) / self.size)
        homo_list_before = self.local_homophily()
        nodes_to_remove = [
            node for node in self.G.nodes()
            if self.get_node_class(node) != manipulation_clas
        ]
        utils.save_to_file(homo_list_before, network_name,
                           '{0}_homo_list_before'.format(strategy_name))
        ''' add, remove or change node '''
        strategy_func(nodes_to_remove, nodes_with_manipulation_clas,
                      class_partitions, pick_strategy, manipulation_clas)

        homo_list_after = self.local_homophily()
        utils.save_to_file(homo_list_after, network_name,
                           '{0}_homo_list_after'.format(strategy_name))
        utils.save_to_file(self.global_homophilies, network_name,
                           '{0}_global_homophilies'.format(strategy_name))
        utils.plot_local_homophily(homo_list_before, homo_list_after,
                                   network_name, strategy_name)
        utils.plot_global_homophily(self.global_homophilies, network_name,
                                    strategy_name)
        utils.plot_all(class_partitions, self.global_homophilies,
                       self.homophily_per_clas, manipulation_clas,
                       network_name, strategy_name)
Example #32
0
def main():
    # 'https: // movie.douban.com/top250'
    # 豆瓣网直接复制下来的 url 有病, invalidURL  , no host supplied
    '''
	前三页的 url
	https://movie.douban.com/top250
	https://movie.douban.com/top250?start=25&filter=
	https://movie.douban.com/top250?start=50&filter=
	'''
    # 用于将所有 movie 的字典形式保存在一个 list, 便于插入 mongodb
    movie_list = []
    for i in range(0, 250, 25):
        url = 'https://movie.douban.com/top250?start={}'.format(i)
        movies = models_from_url(url)
        # 合并 list
        movie_list += movies
        # download_img('doubanTop250', movies)
    log('movie_list', type(movie_list), len(movie_list))
    # 两种存储数据的方式, mongodb, 写入数据文件(json格式)
    insert_many(db, 'doubanTop250', movie_list)
    # 写入文件
    save_to_file(movie_list, 'doubanTop250.txt')
 def _update_haproxy(self, cfg):
     if Haproxy.cls_cfg != cfg:
         logger.info("HAProxy configuration has changed.")
         # Logging the config file may be helpful in the future but is creating too many logs for now.
         # logger.info("HAProxy configuration:\n%s" % cfg)
         Haproxy.cls_cfg = cfg
         if save_to_file(HAPROXY_CONFIG_FILE, cfg):
             Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process)
     elif self.ssl_updated:
         logger.info("SSL certificates have been changed")
         Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process)
     else:
         logger.info("HAProxy configuration remains unchanged")
     logger.info("===========END===========")
def save_all():
    [set_header(x)  for x in classes if not x.load_from_first]

    texts = [cls.get_data() for cls in classes]

    diffs = [get_diff(cls, text) for cls, text in zip(classes, texts)]

    if settings.MOD_SOURCE:
        [save_to_file(cls, text) for cls, text in zip(classes, texts)]
    else:
        zf = ZipFile(settings.PATH_TO_MOD)
        names = set(zf.namelist()) - set([x.FILE_PATH for x in classes])
        data = [(path, zf.read(path)) for path in names]
        zf.close()

        zf = ZipFile(settings.PATH_TO_MOD, 'w')
        [save_to_zip(cls, zf, text) for cls, text in zip(classes, texts)]
        [zf.writestr(file, text) for file, text in data]
        zf.close()
    return diffs
Example #35
0
  item = {
    'ime': values[0],
    'ram': int( values[1] ),
    'ocjena': float( values[2] ),
    'cijena': float( values[3] )
  }
  ramovi.append(item)


fp.close()
fp = open('Baza podataka/procesori.txt')
lines = fp.readlines()
for line in lines:
  line = line.strip()
  values = line.split('|')
  if len(values) != 4: break
  item = {
    'ime': values[0],
    'cpu': int( values[1] ),
    'ocjena': float( values[2] ),
    'cijena': float( values[3] )
  }
  procesori.append(item)

utils.save_to_file('data/igrice.txt', games)
utils.save_to_file('data/ramovi.txt', ramovi)
utils.save_to_file('data/graficke.txt', graficke)
utils.save_to_file('data/procesori.txt', procesori)


def process_exp2_file(filename, **kwargs):
    print 'Processing exp2 file: %s' % filename
    # function scope constants
    cbr_rate = kwargs.pop('cbr_rate')
    agent_names = (kwargs.pop('agent1_name'), kwargs.pop('agent2_name'))
    tcp_fids = ('2', '3')
    tcp_src_nodes = ('0', '4')
    tcp_sink_nodes = ('3', '5')

    # function scope refs
    pkt_tcp_uids = [set(), set()]
    pkt_tcp = [0, 0]
    drop_rates = [0.0, 0.0]
    latencies = [0.0, 0.0]
    pkt_drops = [0, 0]
    throughputs = [0, 0]
    rtts = [{}, {}]
    cells = None
    i = 0
    with open(filename, 'r') as f:
        for line in f:
            cells = line.split()
            try:
                i = tcp_fids.index(cells[7])
            except ValueError:
                continue

            # now switched to corresponding var space
            pkt_tcp_uids[i].add(cells[11])
            if cells[0] == evt_drop:
                pkt_drops[i] += 1
                continue
            elif cells[0] == evt_enque:
                if cells[2] == tcp_src_nodes[i] and cells[4] == 'tcp':
                    if cells[10] not in rtts[i]:
                        rtts[i][cells[10]] = [float(cells[1]), None]
                continue
            elif cells[0] == evt_recv:
                if cells[3] == tcp_sink_nodes[i]:
                    throughputs[i] += int(cells[5])
                elif cells[3] == tcp_src_nodes[i] and cells[4] == 'ack':
                    if cells[10] in rtts[i]:
                        rtts[i][cells[10]][1] = float(cells[1])
                        throughputs[i] += int(cells[5])
                continue

    # save to file
    for i in (0, 1):
        throughputs[i] = float(throughputs[i]) / (1024.0 * 1024.0)
        pkt_tcp[i] = len(pkt_tcp_uids[i])
        drop_rates[i] = float(pkt_drops[i]) / float(pkt_tcp[i])
        latencies[i] = avg_rtts(rtts[i])
        print 'From pkt drop field: %d packets were dropped' % pkt_drops[i]

    cust_name = '_'.join(agent_names)
    save_to_file(format_data_file_name(filename, cust_name, 'THP'),
                 delim, *[cbr_rate, throughputs[0], throughputs[1]])
    save_to_file(format_data_file_name(filename, cust_name, 'DR'),
                 delim, *[cbr_rate, drop_rates[0], drop_rates[1]])
    save_to_file(format_data_file_name(filename, cust_name, 'LT'),
                 delim, *[cbr_rate, latencies[0], latencies[1]])
Example #37
0
 def save(self):
     # Save created mesh
     file_path = INT_TEMP_SAVE_FILE.format(datetime.now())
     save_to_file(path=file_path, data=self._surface.serialise())
     print('[OKAY] file has been saved to:', file_path)
def process_exp1_file(filename, **kwargs):
    print 'Processing exp1 file: %s' % filename
    # function scope constants
    cbr_rate = kwargs.pop('cbr_rate')
    agent_name = kwargs.pop('agent_name')
    tcp_fid = '2'
    tcp_src_node = '0'
    tcp_sink_node = '3'

    # function scope refs
    pkt_tcp_uids = set()
    pkt_drops = 0
    throughput = 0
    rtts = {}
    cells = None
    with open(filename, 'r') as f:
        for line in f:
            cells = line.split()
            # grab tcp events according to the flow id
            if cells[7] == tcp_fid:
                # adds the packet unique id to the set
                pkt_tcp_uids.add(cells[11])
                # record tcp drop event, no need to proceed
                if cells[0] == evt_drop:
                    pkt_drops += 1
                    continue
                # a tcp packet enque (send) event
                elif cells[0] == evt_enque:
                    # if it is a tcp packet sent from tcp src node
                    if cells[2] == tcp_src_node and cells[4] == 'tcp':
                        # record the start time of rtt: (start, end)
                        # if the seq num not exists in the dict
                        if cells[10] not in rtts:
                            rtts[cells[10]] = [float(cells[1]), None]
                    continue
                # a tcp packet recv event
                elif cells[0] == evt_recv:
                    # if it is a tcp packet recved by tcp sink node
                    if cells[3] == tcp_sink_node:
                        # record record packet size to throughput
                        throughput += int(cells[5])
                    # a tcp src node recv (ack) event
                    elif cells[3] == tcp_src_node and cells[4] == 'ack':
                        if cells[10] in rtts:
                            # update rtt end time
                            rtts[cells[10]][1] = float(cells[1])
                            # record ack packet size to throughput
                            throughput += int(cells[5])
                    continue

    # convert throughput, in MB
    throughput = float(throughput) / (1024.0 * 1024.0)
    # calculate drop rate, in digits
    pkt_tcp = len(pkt_tcp_uids)
    drop_rate = float(pkt_drops) / float(pkt_tcp)
    # calculate latency, in seconds
    latency = avg_rtts(rtts)
    print 'From pkt drop field: %d packets were dropped' % pkt_drops
    # append to file
    save_to_file(format_data_file_name(filename, agent_name, 'THP'),
                 delim, *[cbr_rate, throughput])
    save_to_file(format_data_file_name(filename, agent_name, 'DR'),
                 delim, *[cbr_rate, drop_rate])
    save_to_file(format_data_file_name(filename, agent_name, 'LT'),
                 delim, *[cbr_rate, latency])
def save_xml():
    'does not work with archive'
    texts = [cls.get_xml() for cls in classes]
    [save_to_file(cls, text, format='xml') for cls, text in zip(classes, texts)]
    return [['Saved to XML', [('green', 'ok')]]]
Example #40
0
def create_pid_file():
    pid = str(os.getpid())
    save_to_file(PID_FILE, pid)
    return pid
Example #41
0
    def make(self, force=False):
        """
        Build the lexicon.
        """
        final = {}
        lemme_to_original = {}
        C = Corpus(self.CORPUS_EXT)
        for tk in C.tokens:
            # Don't take Proper nouns (SBP) in lexicon
            if tk.verified_tag[:3] == "SBP":
                continue
            # Manage tags frequences
            if not tk.original in final:
                final[tk.original] = defaultdict(int)
            final[tk.original][tk.verified_tag] += 1
            # Manage lemmes frequences
            if not tk.original in lemme_to_original:
                lemme_to_original[tk.original] = {}
            if not tk.verified_tag in lemme_to_original[tk.original]:
                lemme_to_original[tk.original][tk.verified_tag] = defaultdict(int)
            # Frequence of this lemme for this tag for this word...
            lemme_to_original[tk.original][tk.verified_tag][tk.verified_lemme] += 1

        def get_one_line(key):
            """
            Return one line of the lexicon.
            Take the token.original string in parameter.
            """
            return u"%s\t%s" % (key, get_tags(key))

        def get_tags(key):
            """
            Return sorted tags for a original word compiled in a string :
            tag/lemme tag/lemme
            """
            # Retrieve tags
            tags = sorted([(k, v) for k, v in final[key].iteritems()],
                                             key=itemgetter(1), reverse=True)
            # Build final datas
            final_data = []
            for tag, score in tags:
                computed_lemmes = get_lemmes(key, tag)
                lemme, score = computed_lemmes[0]
                final_data.append(u"%s/%s" % (tag, lemme))

            # Return it as a string
            return u" ".join(final_data)

        def get_lemmes(key, tag):
            """
            Return sorted lemmes for one word with one POS tag.
            """
            return sorted(((k, v) for k, v in lemme_to_original[key][tag].iteritems()),
                                                key=itemgetter(1), reverse=True)

        d = []
        for k, v in sorted(final.iteritems()):
            d.append(get_one_line(k))
        final_d = u"\n".join(d)
#            d +=  u"%s\t%s\n" % (k, " ".join([u"%s/%s" % (tp[0], sorted(lemme_to_original[k][tp[0]], key=itemgetter(1), reverse=True)[0]) for tp in sorted([(k2, v2) for k2, v2 in v.iteritems()], key=itemgetter(1), reverse=True)]))
        ext = force and self.VALID_EXT or self.PENDING_EXT
        save_to_file("%s/lexicon%s" % (self.PATH, ext), unicode(final_d))
Example #42
0
def main():
    packages = remove_irrelevant_packages(get_top_packages(), int(TO_CHART * 1.05))
    packages = annotate_pep8(packages)
    packages = remove_irrelevant_packages(packages, TO_CHART)
    save_to_file(packages, 'results.json')
    generate_svg_wheel(packages, len(packages))
Example #43
0
 def export(cls, rules):
     """
     Rules are tuples (rule, score)
     """
     save_to_file("corpus/contextual_rules.pdg", 
                  "\n".join(rule for rule, score in rules))
Example #44
0
def main():
    packages = remove_irrelevant_packages(get_top_packages(), TO_CHART)
    annotate_wheels(packages)
    save_to_file(packages, 'results.json')