def fetch_election(year): ''' Fetch all papers related to an election campaign; year should be one of: 2016, 2012, 2008, 2004, 1960 ''' year_html = get_html(base_url + '/' + year + '_election.php') if year == '2008': # fix weird issue in Fred Thompson's entry year_html = year_html.replace( 'Status: withdrew on <span class="docdate">', 'Status: <span class="docdate">withdrew on ') soup = BeautifulSoup(year_html) container = soup.find('td', class_='doctext').find_parent('table') for td in container.find_all('td', class_='doctext'): paragraphs = td.find_all('p') if len(paragraphs) > 0: info_paragraph, links_paragraph = paragraphs candidate = _get_candidate_info(info_paragraph) for category, category_url in _iter_candidate_categories( links_paragraph): logger.info('Fetching papers from category "%s"', category) category_soup = get_soup(category_url) category_pids = _get_pids(category_soup) for pid in category_pids: paper = fetch(pid) if candidate['name'] != paper['author']: logger.warn( 'candidate name "%s" does not match paper author "%s" (%s)', candidate['name'], paper['author'], pid) paper['category'] = category yield paper
def setup_keypair(self): try: self.import_key_pair_from_file( name=self.provider_dict['ssh']['key_name'], key_file_path=self.provider_dict['ssh']['public_key_path'] ) except NotImplementedError: logger.warn( '`import_key_pair_from_file` not implemented for {}'.format(self.provider_dict['provider']['name'])) pass # DW about it except Exception as e: if not e.message.startswith('InvalidKeyPair.Duplicate'): raise e
def set_clusters(self, within): if not self.node: logger.warn('No node, skipping') return self.setup_connection_meta(within) directory = self.get_directory(self.process_dict, within) res = {} for cluster in self.process_dict['register'][directory]: self.add_to_cluster(cluster, res) if self.previous_clustering_results: self.previous_clustering_results.update(res) else: self.previous_clustering_results = res return res
def handle_deprecations(func_names): called = 0 deprecated = lambda: add(called, 1) and called == 0 and logger.warn( 'Depreciation: use function names ending in numerals instead') deprecated_func_names = 'install', 'setup', 'serve', 'start' frozenset(func_name for func_name in deprecated_func_names if binary_search(func_names, func_name) > -1 and deprecated() ) and next((func_name for func_name in func_names if str.isdigit(func_name[1])), False) and deprecated()
def provision(self, create_or_deploy): try: self.setup_keypair() except LibcloudError as e: logger.warn('{cls}: {msg}'.format(cls=e.__class__.__name__, msg=e.message)) if 'ex_securitygroup' in self.node_specs and self.provider_dict['provider']['name'].startswith('EC2'): print self.node_specs['ex_securitygroup'] if create_or_deploy == 'deploy': with open(self.provider_dict['ssh']['public_key_path'], mode='rt') as f: public_ssh_key = f.read() self.node_specs.update({'deploy': SSHKeyDeployment(public_ssh_key)}) self.node_name = self.strategy.get_node_name() try: self.node = getattr( self, '{0}_node'.format(create_or_deploy) )(name=self.node_name, **self.node_specs) except NotImplementedError as e: if create_or_deploy != 'deploy': raise e error_message = 'deploy_node not implemented for this driver' if e.message != error_message: raise logger.info('{error_message}, so running `create_node` instead.'.format( error_message=error_message.replace('deploy_node', '`deploy_node`') )) self.node = self.create_node(name=self.node_name, **self.node_specs) # logger.info('SoftLayer billing is giving error, will remove condition once resolved.') except LibcloudError as e: logger.warn('{cls}: {msg}'.format(cls=e.__class__.__name__, msg=e.message)) except Exception as e: if e.message.startswith('InvalidGroup.NotFound'): print 'InvalidGroup.NotFound' exit(1) else: raise e
def add_to_cluster(self, cluster, res): """ Specification: 0. Search and handle `master` tag in `cluster_name` 1. Imports `cluster_name`, seeks and sets (`install` xor `setup`) and (serve` or `start`) callables 2. Installs `cluster_name` 3. Serves `cluster_name` """ args = cluster['args'] if 'args' in cluster else tuple() kwargs = update_d({ 'domain': self.dns_name, 'node_name': self.node_name, 'public_ipv4': self.node.public_ips[-1], 'cache': {}, 'cluster_name': cluster.get('cluster_name') }, cluster['kwargs'] if 'kwargs' in cluster else {}) cluster_type = cluster['module'].replace('-', '_') cluster_path = '/'.join(ifilter(None, (cluster_type, kwargs['cluster_name']))) kwargs.update(cluster_path=cluster_path) if ':' in cluster_type: cluster_type, _, tag = cluster_type.rpartition(':') del _ else: tag = None kwargs.update(tag=tag) if tag == 'master': kwargs.update(master=True) if hasattr(self.node, 'private_ips') and len(self.node.private_ips): kwargs.update(private_ipv4=self.node.private_ips[-1]) guessed_os = self.guess_os() # import `cluster_type` try: setattr(self, 'fab', getattr(__import__(cluster_type, globals(), locals(), [guessed_os], -1), guessed_os)) except AttributeError as e: if e.message != "'module' object has no attribute '{os}'".format(os=guessed_os): raise raise ImportError('Cannot `import {os} from {cluster_type}`'.format(os=guessed_os, cluster_type=cluster_type)) fab_dir = dir(self.fab) # Sort functions like so: `step0`, `step1` func_names = sorted( (j for j in fab_dir if not j.startswith('_') and str.isdigit(j[-1])), key=lambda s: int(''.join(takewhile(str.isdigit, s[::-1]))[::-1] or -1) ) if 'run_cmds' in cluster: mapping = {'>=': operator.ge, '<': operator.lt, '>': operator.gt, '<=': operator.le} # TODO: There must be a full list somewhere! def dict_type(run_cmds, func_names): op = mapping[run_cmds['op']] return [func_name for func_name in func_names if op(int(''.join(takewhile(str.isdigit, func_name[::-1]))[::-1]), int(run_cmds['val']))] run_cmds_type = type(cluster['run_cmds']) if 'exclude' in cluster['run_cmds']: func_names = tuple(ifilter(lambda func: func not in cluster['run_cmds']['exclude'], func_names)) func_names = dict_type(cluster['run_cmds'], func_names) '''{ DictType: dict_type(cluster['run_cmds'], func_names) }.get(run_cmds_type, raise_f(NotImplementedError, '{!s} unexpected for run_cmds'.format(run_cmds_type)))''' if not func_names: try: get_attr = lambda a, b: a if hasattr(self.fab, a) else b if hasattr(self.fab, b) else raise_f( AttributeError, '`{a}` nor `{b}`'.format(a=a, b=b)) func_names = ( get_attr('install', 'setup'), get_attr('serve', 'start') ) except AttributeError as e: logger.error('{e} found in {cluster_type}'.format(e=e, cluster_type=cluster_type)) raise AttributeError( 'Function names in {cluster_type} must end in a number'.format(cluster_type=cluster_type) ) # 'must'! logger.warn('Deprecation: Function names in {cluster_type} should end in a number'.format( cluster_type=cluster_type) ) self.handle_deprecations(func_names) for idx, step in enumerate(func_names): exec_output = execute(getattr(self.fab, step), *args, **kwargs)[self.dns_name] if idx == 0: res[self.dns_name] = {cluster_path: {step: exec_output}} if tag == 'master': save_node_info('master', [self.node_name], folder=cluster_type, marshall=json) else: res[self.dns_name][cluster_path][step] = exec_output save_node_info(self.node_name, node_to_dict(self.node), folder=cluster_path, marshall=json)