def _main(self, ip, seconds, options=None): ipadr = ip options = self.get_options() if not options else options conf = deepcopy(settings.STANDALONE_DEFAULTS) cconf = { "id": options.get("--service-id", None), "name": "blipp", "properties": { "configurations": { "unis_url": options.get("--unis-url", None), } } } delete_nones(cconf) merge_dicts(conf, cconf) if options['--config-file']: fconf = self.get_file_config(options['--config-file'], ip) merge_dicts(conf, fconf) bconf = BlippConfigure(initial_config=conf, node_id=options['--node-id'], pre_existing_measurements=options['--existing'], urn=options['--urn']) bconf.initialize() # EK: don't need to refresh right away, right? # bconf.refresh() config = bconf.config logger.info('main', config=pprint.pformat(config)) # logger.warn('NODE: ' + HOSTNAME, config=pprint.pformat(config)) time.sleep(seconds) arbiter.main(bconf, ip)
def get_capabilities(self, metadata, bbox): with open("templates/getcapabilities.xml") as f: root = objectify.parse(f).getroot() with open("templates/featuretype.xml") as f: featureTypeInfo = merge_dicts( metadata, { "minx": bbox[0], "miny": bbox[1], "maxx": bbox[2], "maxy": bbox[3], }, {"name": self.make_wfs_safe_layername(metadata["name"])}) featureType = tostring(parse(f).getroot()).decode("utf-8") featureType = featureType.format(**featureTypeInfo) root.FeatureTypeList.append(fromstring(featureType)) getCaps = tostring(root).decode("utf-8") getCaps = getCaps.format(**merge_dicts( self.service_info, { "API_URL": self.api_url, "COGJ_URL": urllib.quote(self.service_info["COGJ_URL"]) })) return getCaps
def extract_marks(self): # To be really correct, must handle "spec", "repeat", etc., but this should be good enough for the charts we deal with self.marks = {} encoding_stack = [] if "encoding" in self.spec: encoding_stack.append(self.spec["encoding"]) if "mark" in self.spec: if isinstance(self.spec["mark"], str): self.marks[self.spec["mark"]] = encoding_stack[-1] else: self.marks[self.spec["mark"]["type"]] = encoding_stack[-1] if "layer" in self.spec: for layer in self.spec["layer"]: if "encoding" in layer: encoding_stack.append(layer["encoding"]) if "mark" in self.spec: if isinstance(self.spec["mark"], str): self.marks[self.spec["mark"]] = utils.merge_dicts( encoding_stack) else: self.marks[self.spec["mark"][ "type"]] = utils.merge_dicts(encoding_stack) if "encoding" in layer: encoding_stack.pop()
def get_commands_dict(self, user): commands = self.commands_public.copy() if user in self.regulars + self.moderators + self.admin: commands = merge_dicts(commands, self.commands_regulars) if user in self.moderators + self.admin: commands = merge_dicts(commands, self.commands_moderators) if user in self.admin: commands = merge_dicts(commands, self.commands_private) return commands
def plot(label, num_filled, freq, num_shells_range): method_blacklist = [ "imsrg[f]+eom[n]", "magnus_quads+eom", "fci", "hf", ] if label != "ground": method_blacklist.append("imsrg") fit_data = fits.load_fit_data(label) d = utils.load_all() d = utils.filter_preferred_ml(d) d = d[~d["method"].isin(method_blacklist) & (d["interaction"] == "normal") & (d["label"] == label) & (d["num_filled"] == num_filled) & (d["num_shells"] >= num_shells_range[0]) & (d["num_shells"] <= num_shells_range[1]) & (d["freq"] == freq)] num_particles = num_filled * (num_filled + 1) energy_type = { "ground": "ground state", "add": "addition", "rm": "removal" }[label] fig, ax = plt.subplots(1, 2) fig.set_size_inches((6.5, 2.3)) base_markersize = 5 xc = np.linspace(num_shells_range[0] - 0.5, num_shells_range[1] + 0.5, 200) for method, case in d.groupby("method"): case = case.sort_values("num_shells") xs = case["num_shells"].astype(int) ys = case["energy"] marker = utils.METHOD_MARKER[method] style = { "marker": marker, "markerfacecolor": "none", "markersize": (utils.MARKERSIZE_CORRECTION.get(marker, 1.0) * base_markersize), "color": utils.METHOD_COLOR[method], "label": utils.METHOD_LABEL[method], } xms = 0.5 * (xs[1:] + xs[:-1]) yps = abs(ys.diff() / xs.diff()) ax[0].plot(xms, yps, linestyle="none", **style) ax[1].plot(xs, ys, linestyle="none", **style) p = fits.get_fit_params(fit_data, np.max(xs), num_particles, freq, method) ax[0].plot( xc, (abs(p["coefficient"] * p["exponent"]) * xc**(p["exponent"] - 1)), **utils.merge_dicts(style, {"marker": ""})) ax[1].plot(xc, p["coefficient"] * xc**p["exponent"] + p["constant"], **utils.merge_dicts(style, {"marker": ""}))
def create_json_object(raw_csv_data: RawCSVData, final_json_schema, schema_elements: {}): final_output = [] for i, entry in enumerate(raw_csv_data.values): next_dict = {} merge_dicts(next_dict, final_json_schema) populate_schema(entry, next_dict, schema_elements) final_output.append(next_dict) return final_output
def cross_slot_reduce(self, per_slot_metrics: List) -> Any: val_correct_by_param, test_correct_by_param, test_ct = zip(*per_slot_metrics) val_correct_by_param = reduce( lambda x, y: merge_dicts(x, y, lambda a, b: a + b), val_correct_by_param ) test_correct_by_param = reduce( lambda x, y: merge_dicts(x, y, lambda a, b: a + b), test_correct_by_param ) test_ct = sum(test_ct) max_val_param = max(val_correct_by_param, key=val_correct_by_param.get) return test_correct_by_param[max_val_param] / test_ct
def update_aliases(self): """Regenerate the alias database.""" if 'aliases' not in self.db or type(self.db['aliases']) is not dict: self.db['aliases'] = {} d = {} try: d = ast.literal_eval(open(locs.userdata + '/aliases.py').read()) except FileNotFoundError: pass self.aliasdb = utils.merge_dicts(d, self.db['aliases']) for m in self.modules: self.aliasdb = utils.merge_dicts(self.aliasdb, m.aliases)
def patch_vault(self, userid, vault): """ Save vault for specified user (PATCH) Args: userid: json file to load vault: updated vault to save """ current_vault = self.get_vault(userid) if current_vault: merge_dicts(current_vault, vault) self.vaults[userid] = current_vault return self.vaultdb.put_item(current_vault) return {'error': 'Vault not found'}
def update( self, val_correct_by_param: Dict[Any, int], test_correct_by_param: Dict[Any, int], test_ct: int, ) -> None: self.val_correct_by_param = merge_dicts( self.val_correct_by_param, val_correct_by_param, lambda x, y: x + y ) self.test_correct_by_param = merge_dicts( self.test_correct_by_param, test_correct_by_param, lambda x, y: x + y ) self.test_ct += test_ct
def test_merge_dict(): compare_dicts(merge_dicts({}, {}), {}) compare_dicts(merge_dicts({'def': 'ault'}, {}), {'def': 'ault'}) compare_dicts(merge_dicts({'def': 'ault'}, {'def': 'other'}), {'def': 'other'}) compare_dicts( merge_dicts({'nested': { 'def': 'ault', 'other': 'option' }}, {'nested': { 'def': 'other' }}), {'nested': { 'def': 'other', 'other': 'option' }})
def process(self, image, **args): '''process function must return a 2-element tuple, The first is output image, The second is dictionary of other variable which could be used later in the stack ''' self._input = merge_dicts({'image': image}, args) if hasattr(self, '_prehooks'): for prehook in self._prehooks: prehook(self) image_out, args_out = self._process(image, **args) self._output = merge_dicts({'image': image_out}, args_out) if hasattr(self, '_poshooks'): for poshook in self._poshooks: poshook(self) return (image_out, args_out)
class CurrentStudentRoster(Base): __tablename__ = 'ss_current' __table_args__ = merge_dicts(base_table_args, {"schema": "matviews"}) site_id = foreignkey(Sites.site_id, primary_key=True) student_id = foreignkey(Students.student_id, primary_key=True) roster_site = relationship("Sites") roster_student = relationship("Students")
def items(self): items = merge_dicts(*[ account.positions.items() for account in six.itervalues(self._accounts) ]) for k in sorted(items.keys()): yield k, items[k]
def extract(self, options=None): # Figure out if specific or general url # /game/{platform}/{game name} is specific, otherwise /game/{game name} is_specific = re.search(r'http://www\.mobygames\.com/game/[a-z0-9\-]+/[a-z0-9\-_]+', self.source.url) if is_specific: main_url = is_specific.group() else: main_url = re.search(r'http://www\.mobygames\.com/game/[a-z0-9\-_]+', self.source.url).group() main_page = self.get_page(main_url) credits_page = self.get_page(main_url + '/credits') release_page = self.get_page(main_url + '/release-info') specs_page = self.get_page(main_url + '/techinfo') rating_page = self.get_page(main_url + '/rating-systems') extracted_info = merge_dicts(MobyGamesExtractor.scrape_main_page(main_page), MobyGamesExtractor.scrape_credit_page(credits_page), MobyGamesExtractor.scrape_release_page(release_page), MobyGamesExtractor.scrape_specs_page(specs_page), MobyGamesExtractor.scrape_rating_page(rating_page)) html_data = [x.text for x in [y for y in (main_page, credits_page, release_page, specs_page, rating_page) if y]] now = datetime.now(tz=pytz.utc).isoformat() extracted_info['extracted_datetime'] = now extracted_info['source_uri'] = self.source.url # should this be the base main_url or the specific one slurped? extracted_info['source_file_hash'] = save_page_to_extract_store(self.source.url, now, html_data) self.extracted_info = extracted_info
def get_aliases(self): """Returns a dictionary of all current aliases.""" channeld = {} if self.channel: channeld = self.channel.aliases return utils.merge_dicts(self.server.aliasdb, channeld)
def handle_imports(content, directory): import_files = content.get('import', []) if isinstance(import_files, basestring): import_files = [import_files] for fname in import_files[::-1]: import_path = os.path.abspath(os.path.join(directory, fname)) print("importing: '%s'" % import_path) import_directory = os.path.dirname(import_path) with open(import_path) as f: import_content = handle_imports(yaml.load(f), import_directory) expand_periodic_fields(import_content) for wild_key in ('globals/*/fields', 'entities/*/fields'): multi_keys = expand_wild(wild_key, import_content) for multi_key in multi_keys: import_fields = multi_get(import_content, multi_key) local_fields = multi_get(content, multi_key, []) # fields are in "yaml ordered dict" format and we want # simple list of items import_fields = [d.items()[0] for d in import_fields] local_fields = [d.items()[0] for d in local_fields] # merge the lists merged_fields = merge_items(import_fields, local_fields) # convert them back to "yaml ordered dict" merged_fields = [{k: v} for k, v in merged_fields] multi_set(content, multi_key, merged_fields) content = merge_dicts(import_content, content) return content
def handle_imports(content, directory): import_files = content.get('import', []) if isinstance(import_files, basestring): import_files = [import_files] for fname in import_files[::-1]: import_path = os.path.join(directory, fname) print("importing: '%s'" % import_path) import_directory = os.path.dirname(import_path) with open(import_path) as f: import_content = handle_imports(yaml.load(f), import_directory) expand_periodic_fields(import_content) for wild_key in ('globals/*/fields', 'entities/*/fields'): multi_keys = expand_wild(wild_key, import_content) for multi_key in multi_keys: import_fields = multi_get(import_content, multi_key) local_fields = multi_get(content, multi_key, []) # fields are in "yaml ordered dict" format and we want # simple list of items import_fields = [d.items()[0] for d in import_fields] local_fields = [d.items()[0] for d in local_fields] # merge the lists merged_fields = merge_items(import_fields, local_fields) # convert them back to "yaml ordered dict" merged_fields = [{k: v} for k, v in merged_fields] multi_set(content, multi_key, merged_fields) content = merge_dicts(import_content, content) return content
def wrapped(self, *args, **kwargs): # temporary dictionary self.temp = {} # call the function to do the processing func(self, *args, **kwargs) # merge temporary and processed dicts self.processed = merge_dicts(self.processed, self.temp)
def update_service(self, host, service, data, force=False): """ This function updates/inserts a service * It used by Arbiter in hook_late_configuration to put the configuration in the database * It used by Poller to put collected data in the database The 'force' is used to overwrite the service datas (used in cache manager) Return * query_result: None * error: bool """ # Get key key = self.build_key(host, service) if not force: old_dict = self.db_conn.get(key) if old_dict is not None: old_dict = eval(old_dict) # Merge old data and new data data = merge_dicts(old_dict, data) if data is None: return (None, True) # Save in redis try: self.db_conn.set(key, data) except Exception as exp: logger.error("[SnmpBooster] [code 1304] [%s, %s] " "%s" % (host, service, str(exp))) return (None, True) return (None, False)
def setup_order_modify_cancel(kiteconnect, variety): symbol = params["exchange"] + ":" + params["tradingsymbol"] ltp = kiteconnect.ltp(symbol) updated_params = utils.merge_dicts( params, { "product": kiteconnect.PRODUCT_MIS, "variety": variety, "order_type": kiteconnect.ORDER_TYPE_LIMIT }) diff = ltp[symbol]["last_price"] * 0.01 updated_params["price"] = ltp[symbol]["last_price"] - (diff - (diff % 1)) order_id = kiteconnect.place_order(**updated_params) # delay order fetch so order is not in received state time.sleep(0.5) order = kiteconnect.order_history(order_id) status = order[-1]["status"].upper() if not is_pending_order(status): warnings.warn(UserWarning("Order is not open with status: ", status)) return return (updated_params, order_id, order)
def wrap_up_extraction(self, d): if d['status'] == 'finished': if platform.system() == 'Windows': filename = d['filename'].split('\\')[-1].rpartition('.')[0] # Flimsy for now filename_with_ext = d['filename'].split('\\')[-1] else: filename = d['filename'].split('/')[-1].rpartition('.')[0] # Flimsy for now filename_with_ext = d['filename'].split('/')[-1] hash = save_file_to_store(os.path.join(TEMP_DIRECTORY, filename_with_ext)) hash_dir = os.path.join(LOCAL_CITATION_DATA_STORE, hash) shutil.copy2(os.path.join(TEMP_DIRECTORY, "{}.description".format(filename)), hash_dir) shutil.copy2(os.path.join(TEMP_DIRECTORY, "{}.info.json".format(filename)), hash_dir) shutil.copy2(os.path.join(TEMP_DIRECTORY, "{}.annotations.xml".format(filename)), hash_dir) with open(os.path.join(TEMP_DIRECTORY, "{}.info.json".format(filename))) as json_file: info_json = json.load(json_file) extracted_info = {} extracted_info['source_uri'] = self.source.url extracted_info['source_file_hash'] = hash extracted_info['extracted_datetime'] = datetime.now(tz=pytz.utc).isoformat() extracted_info['source_file_name'] = filename_with_ext # Currently merging everything, might want to be more discriminate extracted_info = merge_dicts(info_json, extracted_info) # info_json['title'] -> extracted_info['title'] # Clean up tmp directory shutil.rmtree(TEMP_DIRECTORY) # Signal complete self.extracted_info = extracted_info
def set_user_config(config): user_cfg = dict() user_cfg["user_vars"] = dict() if os.path.exists("config_user.jsc"): user_cfg = json.loads(open("config_user.jsc", "r").read()) if utils.get_platform_name() == "win32": set_user_config_vs_version(user_cfg["user_vars"]) set_user_config_vc_vars(user_cfg["user_vars"]) set_user_config_winsdk_version(user_cfg["user_vars"]) # 需要将user_config合并到config中 if os.path.exists("config_user.jsc"): user_cfg = json.loads(open("config_user.jsc", "r").read()) utils.merge_dicts(config, user_cfg)
def _process(self, image, **args): assert 'segments' in args segments = args['segments'] if len(segments) == 0: return (image, args) good_area = (segments[:, 2] * segments[:, 3]) >= self.min_area result = segments[good_area] return (image, merge_dicts(args, {'segments': result}))
def _parse_permissions_files(self, filenames): permissions = {} for item in filenames.split(','): filename = item.strip() if filename: with open(filename, 'r') as data_file: permissions = utils.merge_dicts(permissions, yaml.load(data_file)) return permissions
def _setup_service(self): config = self.config logger.debug('_setup_service', config=pprint.pformat(config)) r = None if config.get("id", None): r = self.unis.get("/services/" + config["id"]) if not r: logger.warn('_setup_service', msg="service id not specified or not found "\ "unis instance ...querying for service") rlist = self.unis.get("/services?name=" + config.get("name", None) +\ "&runningOn.href=" + config["runningOn"]["href"] + "&limit=2") # loop over the returned services and find one that # doesn't return 410 see # https://uisapp2.iu.edu/jira-prd/browse/GEMINI-98 if rlist: for i in range(len(rlist)): r = self.unis.get('/services/' + rlist[i]["id"]) if r: if isinstance(r, list): logger.warn('_setup_service', msg="id not unique... taking first result") r = r[0] logger.info('_setup_service', msg="%s service found with id %s" % (config["name"], r["id"])) break else: logger.warn('_setup_service', msg="no service found by id or querying "\ "...creating new service") if r: merge_dicts(config, r) # always update UNIS with the merged config if config.get("id", None): r = self.unis.put("/services/" + config["id"], data=config) else: r = self.unis.post("/services", data=config) if r: merge_dicts(config, r) if r: self.service_setup = True else: logger.warn('_setup_service', msg="unable to set up service in UNIS")
def _parse_permissions_files(self, filenames): permissions = {} for item in filenames.split(','): filename = item.strip() if filename: with open(filename, 'r') as data_file: permissions = utils.merge_dicts( permissions, yaml.load(data_file)) return permissions
def expand_all_info(host): _fields = IPMI._fields ipmi_info = gen_dict(_fields, host.ipmi.__dict__) host_info = { "hostname": host.hostname, "host_uuid": host.host_uuid, "tags": [tag.tag_name for tag in host.tag_set.all()] } return merge_dicts(ipmi_info, host_info)
def _parse_quota_files(self, filenames_spec): self.quota_data = {} filenames = filenames_spec.split(',') for item in filenames: filename = item.strip() if filename: with open(filename, 'r') as data_file: self.quota_data = utils.merge_dicts(self.quota_data, yaml.load(data_file)) return self.quota_data
def load_config_files(cls): import utils root_cfg = cls._load_route_config() all_cfg = {} if root_cfg is not None and 'installed_apps' in root_cfg: for app in root_cfg['installed_apps']: app_cfg = cls._load_app_config(app) if app_cfg: all_cfg = dict(all_cfg.items() + app_cfg.items()) return utils.merge_dicts(all_cfg, root_cfg)
def _process(self, image, **args): assert 'segments' in args segments = args['segments'] if len(segments) == 0: return (image, args) good_min_ratio = segments[:, 3] >= self.min_h_w_ratio * segments[:, 2] good_max_ratio = segments[:, 3] <= self.max_h_w_ratio * segments[:, 2] result = segments[good_min_ratio * good_max_ratio] return (image, merge_dicts(args, {'segments': result}))
def _process(self, image, **args): assert 'segments' in args segments = args['segments'] if len(segments) == 0: return (image, args) m = contained_segments_matrix(segments) no_contain = True - numpy.max(m, axis=1) result = segments[no_contain] return (image, merge_dicts(args, {'segments': result}))
def _process(self, image, **args): assert 'segments' in args segments = args['segments'] if len(segments) == 0: return (image, args) good_width = segments[:, 2] <= self.max_width good_height = segments[:, 3] <= self.max_height result = segments[good_width * good_height] #AND return (image, merge_dicts(args, {'segments': result}))
def _process(self, image, **args): image_input = image image = image.copy() contours, hierarchy = cv2.findContours(image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) segments = segments_to_numpy([cv2.boundingRect(c) for c in contours]) if len(segments) <= 1: segments = numpy.empty((0, 4), dtype=int) self.contours, self.hierarchy = contours, hierarchy #store, may be needed for debugging return (image_input, merge_dicts(args, {'segments': segments}))
def _process( self, image, **args ): assert 'segments' in args assert 'regions' in args segments = args['segments'] regions = args['regions'] #regions produced by feature_extractor has different dimension setting to lagsane network #adapt it here regions = numpy.rollaxis(regions, 3, 1) segment_types = numpy.argmax(self.predict_fn(regions), axis = 1) return (image, merge_dicts(args, {'segment_types':segment_types}))
def _parse_quota_files(self, filenames_spec): self.quota_data = {} filenames = filenames_spec.split(',') for item in filenames: filename = item.strip() if filename: with open(filename, 'r') as data_file: self.quota_data = utils.merge_dicts( self.quota_data, yaml.load(data_file)) return self.quota_data
def build_json_schema_object_hierarchy(schema_elements: []): final_schema = {} for element in schema_elements: if element.exclusion_type != ExclusionType.ALWAYS_EXCLUDE: if (not element.ignore_object_delimiter ) and const.OBJECT_DELIMITER in element.original_name: delimited_heading: [] = element.original_name.split( const.OBJECT_DELIMITER) update_element_output_name_with_object_delimiter( delimited_heading, element) current_dict = defaultdict() build(current_dict, delimited_heading, element) merge_dicts(final_schema, current_dict) continue final_schema[element.output_name] = element.default_value element.json_key = element.output_name return final_schema
def dot_arctic_phase_helper(self): dxl_defaults = {'dxlclient': '/etc/dxlclient.config'} watch_defaults = { 'watch': '/var/log/suricata/files/', 'existing': False, 'sandbox': False, 'sandboxconfig': '~/.robust' } config = ConfigParser.ConfigParser({}) fname = os.path.expanduser("~/.opendxl-arctic-phase") if os.path.isfile(fname): config.read(fname) if config.has_section("opendxl"): opendxl = self.config_section_map(config, "opendxl", dxl_defaults) dot_arctic_phase_opendxl = {'dxlclient': opendxl["dxlclient"]} else: dot_arctic_phase_opendxl = dxl_defaults if config.has_section("watch"): watch = self.config_section_map(config, "watch", watch_defaults) dot_arctic_phase_connection = { 'watch': watch["watch"], 'existing': watch["existing"], 'sandbox': watch["sandbox"], 'sandboxconfig': watch["sandboxconfig"] } else: dot_arctic_phase_watch = watch_defaults # config file present, merge sections dot_arctic_phase_dict = utils.merge_dicts(dot_arctic_phase_opendxl, dot_arctic_phase_watch) else: # No config file, just merge default dicts dot_arctic_phase_dict = utils.merge_dicts(dxl_defaults, watch_defaults) return dot_arctic_phase_dict
def __ver_object(request, pk, obj_klass, template_name, list_url): obj = get_object_or_404(obj_klass, pk=pk) form_klass = obj_klass.__name__ + "VerForm" constructor = globals()[form_klass] form = constructor(request.POST or None, request.FILES or None, instance=obj) aux = [] if isinstance(obj, Tecnologia): aux = TecnologiaAnexo.objects.select_related().filter(tecnologia=obj) x = {'form': form, 'object': obj, 'aux': aux} perms = __perms_dict(request) return render(request, template_name, merge_dicts(x, perms))
def __init__(self, serial_device='/dev/ttyUSB0', serial_baudrate=115200, serial_timeout=0.5, comm_method='serial', ip_address=None, port=5000, tcp_timeout=2.0, inputs: dict = None, input_default=None): """Constructor :param str serial_device: The serial device to use (if comm_method=='serial'). Default is '/dev/ttyUSB0'. :param int serial_baudrate: Serial baudrate of the device. Default is 115200. :param float serial_timeout: Timeout for serial operations (if comm_method=='serial'). Default is 0.5. :param str comm_method: Communication method. Supported values are 'serial' and 'tcp'. Default is 'serial'. :param str ip_address: IP address of the device (if comm_method=='tcp'). :param int port: Port to connect to (if comm_method=='tcp'). Default is 5000. :param float tcp_timeout: Timeout for socket operations (if comm_method=='tcp'). Default is 2.0. (Lesser values have been problematic with this device.) :param dict inputs: Custom mapping of input names to numbers. :param str input_default: The default input (if any) to select after setup """ try: self._power_status = None self._input_status = None self._av_mute = None if comm_method == 'serial': self.comms = self.Comms() self.comms.serial_device = serial_device self.comms.serial_baudrate = serial_baudrate self.comms.serial_timeout = serial_timeout self.comms.connection = Serial(port=serial_device, baudrate=serial_baudrate, timeout=serial_timeout) self.comms.connection.close() elif comm_method == 'tcp' and ip_address is not None: self.comms = self.Comms() self.comms.tcp_ip_address = ip_address self.comms.tcp_port = port self.comms.tcp_timeout = tcp_timeout self.comms.connection = create_connection((ip_address, port), timeout=tcp_timeout) self.comms.connection.close() # get custom input mapping if inputs and isinstance(inputs, dict): self.inputs = merge_dicts(inputs, self._default_inputs) else: self.inputs = self._default_inputs self._input_default = input_default if input_default: self.select_input(input_default) except Exception as e: logger.error('__init__(): Exception occurred: {}'.format(e.args), exc_info=True) sys.exit(1)
def _process( self, image, **args ): assert 'segments' in args assert 'regions' in args segments = args['segments'] regions = args['regions'] if 'segment_types' in args: segment_types = args['segment_types'].copy() else: segment_types = numpy.repeat(10, len(segments)) self.x1,self.y1= segments[:,0], segments[:,1] self.x2,self.y2= self.x1+segments[:,2], self.y1+segments[:,3] self.s = segments[:, 2] * segments[:, 3] self._output = {'segments': segments, 'segment_types':segment_types} global refPt refPt = None # keep looping until the 'q' key is pressed cv2.namedWindow("norm", cv2.WINDOW_NORMAL) cv2.setMouseCallback("norm", self.click_handler) cv2.namedWindow("selected_region", cv2.WINDOW_NORMAL) classifying = True selected_segment = -1 while classifying: # display the image and wait for a keypress self.display() key = cv2.waitKey(1) & 0xFF # if the 'q' key is pressed, break from the loop if key == 27: classifying = False break if (refPt != None): selected_segment = self.get_selected_segment(refPt) refPt = None if (selected_segment >=0): cv2.imshow("selected_region", regions[selected_segment]) char = chr(key) key = 0 if (char in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']): digit = int(char) self.update_click_class(selected_segment, digit) selected_segment = -1 elif (char == 'r'): digit = 10 self.update_click_class(selected_segment, digit) selected_segment = -1 cv2.destroyAllWindows() return (image, merge_dicts(args, {'segment_types':segment_types}))
def main(options=None): options = get_options() if not options else options logger = settings.get_logger('blippd', options['--log'], options['--log-level']) conf = deepcopy(settings.STANDALONE_DEFAULTS) cconf = { "id": options.get("--service-id", None), "name": "blipp", "properties": { "configurations": { "unis_url": options.get("--unis-url", None), } } } delete_nones(cconf) merge_dicts(conf, cconf) if options['--config-file']: fconf = get_file_config(options['--config-file']) merge_dicts(conf, fconf) bconf = BlippConfigure(initial_config=conf, node_id=options['--node-id'], pre_existing_measurements=options['--existing'], urn=options['--urn']) bconf.initialize() config = bconf.config logger.info('main', config=pprint.pformat(config)) logger.warn('NODE: ' + HOSTNAME, config=pprint.pformat(config)) if options['--daemonize']: with daemon.DaemonContext(): arbiter.main(bconf) else: arbiter.main(bconf)
def _setup(self, config): import tensorflow as tf self.target_timesteps = 1 logger.warning('Starting experiment') tf.logging.set_verbosity(tf.logging.ERROR) if not isinstance(config['env_name'], list): config['env_name'] = [config['env_name']] self.dconfig = dconfig = utils.DotDict(config) self.summary_writer = self.find_tf_logger() or tf.summary.FileWriter( self.logdir) tflog_utils.log_text(self.summary_writer, 'config', str(dconfig)) # Assign different environments to different agents env_count = len(config['env_name']) agent_configs = [ utils.merge_dicts(config, {'env_name': config['env_name'][i % env_count]}) for i in range(dconfig.agent_count) ] self.agents = [ ray_workers.AgentWorker.remote(i, agent_configs[i], self.logdir) for i in range(dconfig.agent_count) ] logger.warning('Setting up agents') # [ray] There is no way to wait for the actors to finalize initialization, thus we put this in a setup method # Comment this out because we call setup in the __init__ function of agent worker # ray.wait([agent.setup.remote() for agent in self.agents], num_returns=dconfig.agent_count) logger.warning('Created agents') if dconfig.restore_count: self._restore_from_specification(dconfig, agent_configs) # Create objective server and sync objective parameters if dconfig.agent_count > 1: params = self.agents[0].get_objective_params.remote() self.server = ray_workers.ObjectiveServer.remote(config, params) logger.warning('Created server') self.obj_param_count = len(ray.get(params)) ray.wait([ agent.update_objective_params.remote(params) for agent in self.agents[1:] ], num_returns=dconfig.agent_count - 1) logger.warning('Synced objective function')
def __adicionar_obj(request, form_klass, list_url, template_name): context = RequestContext(request) if request.method == 'POST': form = form_klass(request.POST, request.FILES) if form.is_valid(): form.save(commit=True) return HttpResponseRedirect(reverse(list_url)) #Not working with chained selects #else: # print form # print form.errors else: form = form_klass() x = {'form': form} perms = __perms_dict(request) return render_to_response(template_name, merge_dicts(x, perms), context)
def __edit_object(request, pk, obj_klass, template_name, list_url): obj = get_object_or_404(obj_klass, pk=pk) form_klass = obj_klass.__name__ + "Form" constructor = globals()[form_klass] form = constructor(request.POST or None, request.FILES or None, instance=obj) if form.is_valid(): form.save() return HttpResponseRedirect(reverse(list_url)) #Not working with chained selects #else: # print form # print form.errors aux = [] if isinstance(obj, Tecnologia): aux = TecnologiaAnexo.objects.select_related().filter(tecnologia=obj) x = {'form': form, 'object': obj, 'aux': aux} perms = __perms_dict(request) return render(request, template_name, merge_dicts(x, perms))
def GET(self, action_url, url_params = {}): """ Make GET request """ url = self.base_url + "/" + action_url parsed_url = urlparse.urlparse(url) if parsed_url.scheme == 'https': connection = httplib.HTTPSConnection(parsed_url.hostname) else: connection = httplib.HTTPConnection(parsed_url.hostname) get_path = parsed_url.path query = urllib.urlencode(utils.merge_dicts(self.common_url_params, url_params)) if query: get_path += "?" + query connection.request('GET', get_path) response = connection.getresponse() response_data = response.read() connection.close() return response_data
def _parse_outlinks(self): # links in hierarchical title and body dicts = [ {'%s/relatedTo' % self.itemtype: [path[0] for path in self.paths[:-1]]}, md_wikilink.parse_wikilinks(self.itemtype, WikiPage.remove_metadata(self.body)), ] # links in structured data for name, value in self.data.items(): if type(value) is list: dicts += [self._schema_item_to_links(name, v) for v in value] else: dicts.append(self._schema_item_to_links(name, value)) # merge merged = merge_dicts(dicts, force_list=True) # exclude links to this page return dict((k, v) for k, v in merged.items() if not((type(v) == list and self.title in v) or self.title == v))
def analyze(self, again=False): """Runs all the analyses and creates a dictionary with all of the analyzed data.""" if not self.analysis_finished and not again: # This ungodly series of list comprehension creates # a list of functions ordered in the sequence given by the decorators. ordered_func_list = [fname for order, fname in sorted(self._analyses.items(), key=lambda item: item[0])] ordered_func_list = [getattr(self, fn) for fn in reduce(list.__add__, ordered_func_list)] # Call all functions [function() for function in ordered_func_list] # Merge intermediates with original dictionary self.finished = merge_dicts(self.leaks, self.processed) self.analysis_finished = True else: # Don't re-analyze unless explicitly asked to error = "This trace has already been analyzed." +\ "If you want to re-run the analysis, please run analyze(again=True)." raise Exception(error)
def parse_data(cls, title, body, itemtype=u'Article'): body = body.replace('\r\n', '\n') default_data = {'name': title, 'schema': schema.get_itemtype_path(itemtype)} # collect yaml_data = cls.parse_schema_yaml(body) body_data = pairs_to_dict((m.group('name'), m.group('value')) for m in re.finditer(cls.re_data, body)) if itemtype == u'Article' or u'Article' in schema.get_schema(itemtype)[u'ancestors']: default_section = u'articleBody' else: default_section = u'longDescription' section_data = cls.parse_sections(body, default_section) # merge data = merge_dicts([default_data, yaml_data, body_data, section_data]) # validation and type conversion typed = schema.SchemaConverter.convert(itemtype, data) return typed
def update_service(self, host, service, data, force=False): """ This function updates/inserts a service * It used by Arbiter in hook_late_configuration to put the configuration in the database * It used by Poller to put collected data in the database The 'force' is used to overwrite the service datas (used in cache manager) Return * query_result: None * error: bool """ # Get key key = self.build_key(host, service) if not force: old_dict = self.db_conn.get(key) if old_dict is not None: old_dict = ast.literal_eval(old_dict) # Merge old data and new data data = merge_dicts(old_dict, data) if data is None: return (None, True) # Save in redis try: self.db_conn.set(key, data) except Exception as exp: logger.error("[SnmpBooster] [code 1304] [%s, %s] " "%s" % (host, service, str(exp))) return (None, True) return (None, False)
#sunny_train_labels = np.array(_sunny_data["labels"])[_train_sunny_indices] #sunny_validation_images = np.array(_sunny_data["images"])[_validation_sunny_indices] #sunny_validation_labels = np.array(_sunny_data["labels"])[_validation_sunny_indices] sunny_train_images = [None] * 1000 sunny_train_labels = [None] * 1000 sunny_validation_images = [None] * 1000 sunny_validation_labels = [None] * 1000 ########################### # Data form preprocessing # ########################### _HOUGH_ROI_PATHS = ( TEMP_FILES_PATH + 'pkl_train_slice2roi.pkl', TEMP_FILES_PATH + 'pkl_validate_slice2roi.pkl',) _hough_rois = utils.merge_dicts(map(_load_file, _HOUGH_ROI_PATHS)) ################################## # Methods for accessing the data # ################################## _METADATA_ENHANCED_TAG = "META_ENHANCED" def _is_enhanced(metadatadict): return metadatadict.get(_METADATA_ENHANCED_TAG, False) def _tag_enhanced(metadatadict, is_enhanced=True): metadatadict[_METADATA_ENHANCED_TAG] = is_enhanced
def get_aliases(self): """Returns a dictionary of all current aliases.""" return utils.merge_dicts(self.server.aliasdb)
def get_environment(self, configure=None, env=None): ''' Build (or return cached) environment with configure/env ''' # set defaults, if none where passed in if configure is None: configure = self.default_configure else: configure = merge_dicts(self.default_configure, configure) if env is None: env = self.default_env else: env = merge_dicts(self.default_env, env) key = self._get_key(configure, env) # TODO: remove, this is a hack for local dev if key not in self.environment_stash: key = self.environment_stash.iterkeys().next() # if we don't have it built already, lets build it if key not in self.environment_stash: self.autoreconf() builddir = tempfile.mkdtemp() # configure args = [os.path.join(self.source_dir, 'configure'), '--prefix=/'] + configure_list(configure) run_sync_command(args, cwd=builddir, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) # make run_sync_command(['make', '-j'], cwd=builddir, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) installdir = tempfile.mkdtemp(dir=self.env_cache_dir) # make install run_sync_command(['make', 'install', 'DESTDIR={0}'.format(installdir)], cwd=builddir, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) shutil.rmtree(builddir) # delete builddir, not useful after install # stash the env self.environment_stash[key] = installdir # create a layout layout = Layout(self.environment_stash[key]) # return an environment cloned from that layout ret = Environment() ret.clone(layout=layout) return ret
def load_def(localdir, ent_name, section_def, required_fields): if 'type' in section_def and 'fields' in section_def: raise Exception("invalid structure for '%s': " "type and fields sections are mutually exclusive" % ent_name) if 'type' in section_def: csv_filename = section_def.get('path', ent_name + ".csv") csv_filepath = complete_path(localdir, csv_filename) str_type = section_def['type'] if isinstance(str_type, basestring): celltype = field_str_to_type(str_type, "array '%s'" % ent_name) else: assert isinstance(str_type, type) celltype = str_type return 'ndarray', load_ndarray(csv_filepath, celltype) fields_def = section_def.get('fields') if fields_def is not None: for fdef in fields_def: if isinstance(fdef, basestring): raise SyntaxError("invalid field declaration: '%s', you are " "probably missing a ':'" % fdef) if all(isinstance(fdef, dict) for fdef in fields_def): fields = fields_yaml_to_type(fields_def) else: assert all(isinstance(fdef, tuple) for fdef in fields_def) fields = fields_def else: fields = None newnames = merge_dicts(invert_dict(section_def.get('oldnames', {})), section_def.get('newnames', {})) transpose = section_def.get('transposed', False) interpolate_def = section_def.get('interpolate') files_def = section_def.get('files') if files_def is None: #XXX: it might be cleaner to use the same code path than for the # multi-file case (however, that would loose the "import any file # size" feature that I'm fond of. # we can simply return the stream as-is #FIXME: stream is not sorted # csv file is assumed to be in the correct order (ie by period then id) csv_filename = section_def.get('path', ent_name + ".csv") csv_filepath = complete_path(localdir, csv_filename) csv_file = CSV(csv_filepath, newnames, delimiter=',', transpose=transpose) if fields is not None: fields = required_fields + fields stream = csv_file.read(fields) if fields is None: fields = csv_file.fields if interpolate_def is not None: raise Exception('interpolate is currently only supported with ' 'multiple files') return 'table', (fields, csv_file.numlines, stream, csv_file) else: # we have to load all files, merge them and return a stream out of that print(" * computing number of rows...") # 1) only load required fields default_args = dict(newnames=newnames, transpose=transpose) if isinstance(files_def, dict): files_items = files_def.items() elif isinstance(files_def, list) and files_def: if isinstance(files_def[0], dict): # handle YAML ordered dict structure files_items = [d.items()[0] for d in files_def] elif isinstance(files_def[0], basestring): files_items = [(path, {}) for path in files_def] else: raise Exception("invalid structure for 'files'") else: raise Exception("invalid structure for 'files'") #XXX: shouldn't we use the "path" defined for the whole entity if any? # section_def.get('path') files = [] for path, kwargs in files_items: kwargs['newnames'] = \ merge_dicts(invert_dict(kwargs.pop('oldnames', {})), kwargs.get('newnames', {})) f = CSV(complete_path(localdir, path), **merge_dicts(default_args, kwargs)) files.append(f) id_periods = union1d(f.as_array(required_fields) for f in files) print(" * reading files...") # 2) load all fields if fields is None: target_fields = merge_items(*[f.fields for f in files]) fields_per_file = [None for f in files] else: target_fields = required_fields + fields fields_per_file = [[(name, type_) for name, type_ in target_fields if name in f.field_names] for f in files] total_fields = set.union(*[set(f.field_names) for f in files]) missing = set(name for name, _ in target_fields) - total_fields if missing: raise Exception("the following fields were not found in any " "file: %s" % ", ".join(missing)) total_lines = len(id_periods) # allocate main array target = np.empty(total_lines, dtype=np.dtype(target_fields)) # fill with default values target[:] = tuple(missing_values[ftype] for _, ftype in target_fields) target['period'] = id_periods['period'] target['id'] = id_periods['id'] arrays = [f.as_array(fields_to_load) for f, fields_to_load in zip(files, fields_per_file)] # close all files for f in files: f.close() #FIXME: interpolation currently only interpolates missing data points, # not data points with their value equal the missing value # corresponding to the field type. This can only be fixed once # booleans are loaded as int8. if interpolate_def is not None: if any(v != 'previous_value' for v in interpolate_def.itervalues()): raise Exception("currently, only 'previous_value' " "interpolation is supported") to_interpolate = [k for k, v in interpolate_def.iteritems() if v == 'previous_value'] else: to_interpolate = [] interpolate(target, arrays, id_periods, to_interpolate) return 'table', (target_fields, total_lines, iter(target), None)
def add_aliases(self, aliases): """ Add multiple aliases with format: 'name': 'content' """ self.aliases = utils.merge_dicts(self.aliases, aliases)
def scrape_release_page(page_data): main_dict = {} if not page_data: return main_dict b = bs4.BeautifulSoup(page_data.text, 'html.parser') # Try to get the first h2's parent, only h2's on release page are platform names # If not present, there is no release information try: release_div = b.find('h2').parent except AttributeError: return main_dict main_dict['release_platforms'] = {} # Releases are a straight list of divs without much identifying information, thus this organization # instead of a rather large for loop and condition mess header_rules = (('header', 'attr'), ('header', 'rel_info')) attr_rules = (('attr', 'attr'), ('attr', 'rel_info')) patch_rules = (('patch', 'patch_rel_info'),) patch_rel_info_rules = (('patch_rel_info', 'patch_rel_info'),) rel_info_rules = (('rel_info', 'rel_info'), ('rel_info', 'patch')) index_rule = (('rel_info', 'attr'), ('rel_info', 'header'), ('patch_rel_info', 'header')) stop_rule = (('attr', None), ('rel_info', None), ('patch_rel_info', None)) def create_div_tuple(div): if div.name == u'h2': # ('header', platform name) return 'header', replace_xa0(div.text) elif div.name == u'b': # ('patch', None) return 'patch', None elif 'class' in div.attrs and u'relInfo' in div['class']: # ('patch_rel_info', {relInfoTitle: relInfoDetails, ...}) return 'patch_rel_info', dict([(replace_xa0(div.find(class_='relInfoTitle').text), replace_xa0(div.find(class_='relInfoDetails').text))]) elif 'class' in div.attrs and u'floatholder' in div['class']: # ('attr', {attr_name: [value, ...]}) return 'attr', {snake_case(replace_xa0(div.find(class_='fl').text)): [a.text for a in div.find_all('a')]} elif div.find(class_='relInfo'): # ('rel_info', {relInfoTitle: relInfoDetails, ...}) return 'rel_info', dict([(replace_xa0(r.find(class_='relInfoTitle').text), replace_xa0(r.find(class_='relInfoDetails').text)) for r in div.find_all(class_='relInfo')]) else: return None, None # Pages with patch histories insert newline characters that convert to NavigableStrings, don't want those release_divs = map(create_div_tuple, [c for c in release_div.children if not isinstance(c, bs4.NavigableString)]) release_platform = None rel_dict = {'releases': []} for first, second in pairwise_overlap(release_divs): rule = (first[0], second[0]) if rule in header_rules: # Start a new listing for platform release_platform = first[1] main_dict['release_platforms'][release_platform] = [] elif rule in patch_rules: # Add key for patch history rel_dict['patch_history'] = [] elif rule in attr_rules: # Add key / value to release dict rel_dict = merge_dicts(rel_dict, first[1]) elif rule in rel_info_rules: # Add new relInfo to release dict rel_dict['releases'].append(first[1]) elif rule in patch_rel_info_rules: # Add patch relInfo to release dict rel_dict['patch_history'].append(first[1]) elif rule in index_rule: # Add new relInfo to current release dict and start new one if rule[0] == 'rel_info': rel_dict['releases'].append(first[1]) elif rule[0] == 'patch_rel_info': rel_dict['patch_history'].append(first[1]) main_dict['release_platforms'][release_platform].append(rel_dict) rel_dict = {'releases': []} elif rule in stop_rule: # Out of release information divs, clean up and stop processing if rule[0] == 'attr': rel_dict = merge_dicts(rel_dict, first[1]) elif rule[0] == 'rel_info': rel_dict['releases'].append(first[1]) elif rule[0] == 'patch_rel_info': rel_dict['patch_history'].append(first[1]) main_dict['release_platforms'][release_platform].append(rel_dict) break return main_dict
def model_fields(self, **kwargs): applicants = merge_dicts(self.__dict__, kwargs) return {k: v for k, v in applicants.iteritems() if k in self.model._meta.fields and k not in ['id', 'created_at']}
import utils ITEM_ID_NUMBERS = utils.merge_dicts({ 0x0000 : "Null", #(used for UCMM messages). Indicates that encapsulation routing is NOT needed. Target is either local (ethernet) or routing info is in a data Item. 0x000C : "ListIdentity Response", # 0x0091 : "Reserved", # for legacy (RA) 0x00A1 : "Connected Address Item", # (used for connected messages) 0x00B1 : "Connected Data Item", # Connected Transport packet 0x00B2 : "Unconnected Data Item", # Unconnected Messages (eg. used within CIP command SendRRData) 0x0100 : "ListServices response", # 0x8000 : "Sockaddr Info, originator-to-target", # 0x8001 : "Sockaddr Info, target-to-originator", # 0x8002 : "Sequenced Address item", # }, {k: "Reserved for legacy (RA)" for k in range(0x0001, 0x000B + 1)}, # 0x0001 – 0x000B Reserved for legacy (RA) {k: "Reserved for legacy (RA)" for k in range(0x000D, 0x0083 + 1)}, # 0x000D – 0x0083 Reserved for legacy (RA) {k: "Reserved for future expansion" for k in range(0x0084, 0x0090 + 1)}, # 0x0084 – 0x0090 Reserved for future expansion {k: "Reserved for future expansion" for k in range(0x0092, 0x00A0 + 1)}, # 0x0092 – 0x00A0 Reserved for future expansion {k: "Reserved for legacy (RA)" for k in range(0x00A2, 0x00A4 + 1)}, # 0x00A2 – 0x00A4 Reserved for legacy (RA) {k: "Reserved for future expansion" for k in range(0x00A5, 0x00B0 + 1)}, # 0x00A5 – 0x00B0 Reserved for future expansion {k: "Reserved for future expansion" for k in range(0x00B3, 0x00FF + 1)}, # 0x00B3 – 0x00FF Reserved for future expansion {k: "Reserved for legacy (RA)" for k in range(0x0101, 0x010F + 1)}, # 0x0101 – 0x010F Reserved for legacy (RA) {k: "Reserved for future expansion" for k in range(0x0110, 0x7FFF + 1)}, # 0x0110 – 0x7FFF Reserved for future expansion {k: "Reserved for future expansion" for k in range(0x8003, 0xFFFF + 1)}, # 0x8003 – 0xFFFF Reserved for future expansion #regexps to produce dicts above #(0x[\d|{ABCDF}]{4}).{3}(0x[\d|{ABCDF}]{4}) (.*) #\{k\: \"$3\" for k in range\($1\, $2 \+ 1\)\}\, \# )
def fit_one(self, data, model_y, model_stereo): event_ids = numpy.unique(data.EventID.values) if self.train_size != None: event_ids_train, event_ids_test= train_test_split(event_ids, train_size=self.train_size, random_state=42) else: event_ids_test = event_ids # fit train tracks if self.train_size != None: tracks_train = {} p = Pool(self.processes) results_train = p.map(tracks_reconstruction, zip(event_ids_train, [data]*len(event_ids_train), [model_y]*len(event_ids_train), [model_stereo]*len(event_ids_train))) tracks_train = merge_dicts(results_train) # train clf if self.train_size != None: sc = SuperCombinator() combination_data = sc.data_collection(tracks_train, data) X_data = combination_data[combination_data.columns[:-1]].values y_data = combination_data.label.values xgb_base = XGBoostClassifier(n_estimators=1000, colsample=0.7, eta=0.01, nthreads=1, subsample=0.7, max_depth=8) folding = FoldingClassifier(xgb_base, n_folds=10, random_state=11) folding.fit(X_data, y_data) clf = folding.estimators[0] else: clf = None # fit test tracks tracks_test = {} p = Pool(self.processes) results_test = p.map(tracks_reconstruction, zip(event_ids_test, [data]*len(event_ids_test), [model_y]*len(event_ids_test), [model_stereo]*len(event_ids_test))) tracks_test = merge_dicts(results_test) # quality p = Pool(self.processes) effs = p.map(get_eff_value, zip(event_ids_test, [data]*len(event_ids_test), [tracks_test]*len(event_ids_test), [clf]*len(event_ids_test))) eff = 100. * numpy.array(effs).sum() / len(effs) return eff
import utils import numpy as np import cPickle as pickle hardware_label, baseball_label, religion_label = 4, 10, 20 # get the most frequent 50 non-stop words from each newsgroup # note: train and test were for classification task and irrelevant for our purpose and thus we simply merge them path2dir = '20news-bydate-matlab/matlab/' train_vocabmap = utils.get_word_count(path2dir + 'train.data', path2dir + 'train.label', [hardware_label]) test_vocabmap = utils.get_word_count(path2dir + 'test.data', path2dir + 'test.label', [hardware_label]) htop50w, htop50id = utils.get_top_words(utils.merge_dicts(train_vocabmap, test_vocabmap), 'vocabulary.txt') train_vocabmap = utils.get_word_count(path2dir + 'train.data', path2dir + 'train.label', [baseball_label]) test_vocabmap = utils.get_word_count(path2dir + 'test.data', path2dir + 'test.label', [baseball_label]) btop50w, btop50id = utils.get_top_words(utils.merge_dicts(train_vocabmap, test_vocabmap), 'vocabulary.txt') train_vocabmap = utils.get_word_count(path2dir + 'train.data', path2dir + 'train.label', [religion_label]) test_vocabmap = utils.get_word_count(path2dir + 'test.data', path2dir + 'test.label', [religion_label]) rtop50w, rtop50id = utils.get_top_words(utils.merge_dicts(train_vocabmap, test_vocabmap), 'vocabulary.txt') # find the union of all word ids; this is less than 150. allwids = list(set(htop50id).union(set(btop50id)).union(set(rtop50id))) # create document term (count) matrices from our choice of words hdocmat_train = utils.get_docmat(allwids, path2dir + 'train.data', path2dir + 'train.label', [hardware_label]) hdocmat_test = utils.get_docmat(allwids, path2dir + 'test.data', path2dir + 'test.label', [hardware_label]) hdocmat = np.concatenate([hdocmat_train, hdocmat_test]) np.savetxt('hardware_data.txt', hdocmat, fmt='%d')