def run(self): """run the search """ #load the maze from file maze = visualization.MazeLoader(self.maze_name) maze.load() start = maze.get_start() goal = maze.get_goal() walls = maze.get_walls() print("walls******") print(walls) # create a world from user input which defines how states evolve # which states are valid and how cost is assigned world_class = util.get_class(world, self.world) world_instance = world_class(walls) # create visualizer to visualize the search problem and solution visualizer = visualization.Visualizer(start, goal, walls) # create a search problem to run the search on search_problem_class = util.get_class(search_problem, self.search_problem) search_problem_instance = search_problem_class(start, goal, world_instance, visualizer) # pick the search algorithm to use search_fn = util.get_class(search_method, self.search_method) action_plan = search_fn(search_problem_instance) # visualize the solution to the search algorithm visualizer.set_action_plan(action_plan) visualizer.show()
def run( self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: str, option_string: typing.Optional[str]): ''' Execution of the action. :name parser: The argument parser in use. :name namespace: The namespace for parsed args. :name values: Values for the action. :name option_string: Option string. ''' build_config = config.get_build_config( path=namespace.path, image_name=values) LOGGER.debug(build_config) builder = util.get_class( package='builder', module=namespace.builder, name=namespace.builder)() builder.build( namespace=namespace, image=values, build_config=build_config)
def get(source_name, config): """ Get instance of data source implementation """ import util; return util.get_class(__name__, source_name, Source)(config)
def get(source_name, config): """ Get instance of notifier implementation """ import util; return util.get_class(__name__, source_name, Notifier)(config)
def flatten(self, relative=False): ''' 基于当前FILE,将所有的sub_level 进行扁平化。 返回的list,结构如下: [{'orm': <FILE>(1131598333513092212, sd_0010_plt_bga_v0004, VERSION), 'sub_level': u'fullres/exr', 'file': SequentialFiles(filename=DiskPath(u'/Volumes/filedata/tech/publish/dayu/sequence/sd/sd_0010/element/plt/sd_0010_plt_bga/sd_0010_plt_bga_v0004/fullres/exr/sd_0010_plt_bga_v0004.%04d.exr'), frames=[1001, 1002, 1003, 1004, 1005, 1006], missing=[])}] {...} ] :param relative: bool,表示文件的filename,是否需要只是相对路径 :return: list ''' import util file_table = util.get_class('file') if not isinstance(self, file_table): return [] older_files = self.parent.sub_files.filter(file_table.name <= self.name).all() # older_files = [x for x in all_versions if x.name <= self.name] temp_flatten_dict = dict() for v in older_files: current_version_path = v.disk_path('publish') for s in v.sub_level.walk(collapse=True, relative=relative): sub_level_key = '/'.join(s.filename.replace(current_version_path, '').split('/')[:-1]).strip('/') temp_flatten_dict.update({sub_level_key: {'file': s, 'orm': v, 'sub_level': sub_level_key}}) return temp_flatten_dict.values()
def run(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: str, option_string: typing.Optional[str]): ''' :name parser: The argument parser in use. :name namespace: The namespace for parsed args. :name values: Values for the action. :name option_string: Option string. ''' build_config = config.get_build_config(path=namespace.path, image_name=values) tag_build = build_config.get_tag_build() version_from = tag_build.version_from if tag_build is None: LOGGER.info('no tag_build configured') if tag_build and version_from: LOGGER.info('using tag_build for %s', version_from.type.value) LOGGER.debug(version_from) repository = util.get_class( package='version_finder', module=version_from.type.value, name=version_from.type.value)(version_from=version_from) versions = repository.get_all( first_versions=namespace.first_versions) for v in [v for v in (versions.stable or [])]: LOGGER.info(f'stable: {v}') for v in [v for v in (versions.unstable or [])]: LOGGER.info(f'unstable: {v}')
def main(): '''Prints field list to stdout''' usage = "( python -m osdcquery.fieldlist | %prog ) [options] url" parser = OptionParser(usage=usage) shared_options(parser) (options, args) = parser.parse_args() logger = get_simple_logger(options.loglevel, options.verbose) num_args = 1 if len(args) != num_args: parser.error("incorrect number of arguments") url = args[0] settings = importlib.import_module(options.config) field_list_class = get_class(settings.field_module_name, settings.field_class_name) field_list = field_list_class(url) fields = field_list.attributes() for field in fields: print "--%s" % field,
def get(source_name, config): """ Get instance of parser implementation """ import util return util.get_class(__name__, source_name, Parser)(config)
def get_sql_attributes(class_name_or_property): atts = class_name_or_property.split('.') db_class = util.get_class(atts[0]) if len(atts) == 1: return {key: { 'type' : str(value._orig_columns[0].type) \ if type(value) == ColumnProperty \ else 'relation', 'join' : True if type(value) == RelationshipProperty else False, 'table': None if type( value) == ColumnProperty else value.mapper.class_.__tablename__} for key, value in inspect(db_class).attrs.items()} else: current_class = db_class for x in atts[1:]: orm_property = inspect(current_class).attrs.get(x, None) if type(orm_property) == ColumnProperty: return None if orm_property is None: raise Exception('no such attr') current_class = orm_property.mapper.class_ return { key: { 'type': str(value._orig_columns[0].type) if type(value) == ColumnProperty else 'relation', 'join': True if type(value) == RelationshipProperty else False, 'table': None if type(value) == ColumnProperty else value.mapper.class_.__tablename__ } for key, value in inspect(current_class).attrs.items() }
def get(source_name, config): """ Get instance of data source implementation """ import util return util.get_class(__name__, source_name, Source)(config)
def integrator(self): """ Create an integrator based on what the config file specified. The resulting integrator is initialized with this system, and whatever addational arguments were specified in the config. """ integrator_type = util.get_class(self.config[INTEGRATOR]) return integrator_type(self, *self.config[INTEGRATOR_ARGS])
def __init__(self, fid, mode="r"): """ Create a system object Args: fid: file name of a configuration hdf file mode: the mode to open the file, defaults to "r" - read only. For running a simulation, it should be "a" - Read/write if exists. For debugging, mode should be 'd', this mode opens the file in read/write mode but does not clear any timesteps. """ logging.info("creating System, fid={}".format(fid)) fmode = 'a' if (mode == 'a' or mode == 'd') else 'r' self.hdf = h5py.File(fid, fmode) self.config = self.hdf[CONFIG].attrs self._box = self.config[BOX] # if there is a current timestep, keep it around for debugging purposes if self.hdf.id.links.exists(CURRENT_TIMESTEP): print("WARNING, found previous \"current_timestep\" key, this means that a previous simulation likely crashed") logging.warn("found previous \"current_timestep\" key, this means that a previous simulation likely crashed") # load the universe object from either the last timestep, or from the src_files # its expensive to create a universe, so keep it around for the lifetime # of the system self.universe = self._create_universe() # load the subsystems # this list will remain constant as long as the topology remains constant. logging.info("creating subsystems") factory = util.get_class(self.config[SUBSYSTEM_FACTORY]) self.ncgs, self.subsystems = factory(self, self.config[SUBSYSTEM_SELECTS], *(self.config[INTEGRATOR_ARGS].tolist()+self.config[SUBSYSTEM_ARGS].tolist())) logging.debug("using {} cg variables for each {} subsystems".format(self.ncgs, len(self.subsystems))) # notify subsystems, we have a new universe [s.universe_changed(self.universe) for s in self.subsystems] md_nensemble = self.config[MULTI] # number of data points in trajectory, md steps / output interval md_nsteps = int(self.config[MD_STEPS])/int(self.md_args[NSTXOUT]) # number of subsystems nrs = len(self.subsystems) # cg: nensembe x n segment x n_step x n_cg self.cg_positions = zeros((md_nensemble,nrs,md_nsteps,self.ncgs)) self.cg_forces = zeros((md_nensemble,nrs,md_nsteps,self.ncgs)) self.cg_velocities = zeros((md_nensemble,nrs,md_nsteps,self.ncgs)) logging.info("pos {}".format(self.cg_positions.shape)) logging.info("frc {}".format(self.cg_forces.shape)) logging.info("vel {}".format(self.cg_velocities.shape))
def init_components(): """Init hackathon factory""" from hackathon.user import UserManager, UserProfileManager from hackathon.hack import HackathonManager, AdminManager, TeamManager, DockerHostManager, \ AzureCertManager, RegisterManager, HackathonTemplateManager, Cryptor from hackathon.template import TemplateLibrary from hackathon.remote.guacamole import GuacamoleInfo from hackathon.cache.cache_mgr import CacheManagerExt from hackathon.hazure.azure_formation import AzureFormation # dependencies MUST be provided in advance factory.provide("util", Utility) factory.provide("log", log) init_db() # hazure factory.provide("azure_formation", AzureFormation) # utils init_voice_verify() init_sms() factory.provide("email", Email) # cache factory.provide("cache", CacheManagerExt) # scheduler factory.provide("scheduler", scheduler) # business components factory.provide("user_manager", UserManager) factory.provide("user_profile_manager", UserProfileManager) factory.provide("hackathon_manager", HackathonManager) factory.provide("register_manager", RegisterManager) factory.provide("azure_cert_manager", AzureCertManager) factory.provide("cryptor", Cryptor) factory.provide("docker_host_manager", DockerHostManager) factory.provide("hackathon_template_manager", HackathonTemplateManager) factory.provide("template_library", TemplateLibrary) factory.provide("admin_manager", AdminManager) factory.provide("team_manager", TeamManager) factory.provide("guacamole", GuacamoleInfo) # experiment starter init_expr_components() # health check items factory.provide("health_check_hosted_docker", get_class("hackathon.health.health_check.HostedDockerHealthCheck")) factory.provide("health_check_alauda_docker", get_class("hackathon.health.health_check.AlaudaDockerHealthCheck")) factory.provide("health_check_guacamole", get_class("hackathon.health.health_check.GuacamoleHealthCheck")) factory.provide("health_check_azure", get_class("hackathon.health.health_check.AzureHealthCheck")) factory.provide("health_check_mongodb", get_class("hackathon.health.health_check.MongoDBHealthCheck")) # docker factory.provide("hosted_docker_proxy", get_class("hackathon.docker.hosted_docker.HostedDockerFormation")) factory.provide("alauda_docker_proxy", get_class("hackathon.docker.alauda_docker.AlaudaDockerFormation")) # storage init_hackathon_storage()
def process(self): '''Pull every website's data and return the completed Product.''' for website in settings.COMPANIES_TO_PROCESS: website = get_class(website) website_product = website( self.sese_name, self.sese_category, self.sese_organic) website_product.get_and_set_product_information() attributes = website_product.get_company_attributes() self._add_companys_attributes(website.ABBREVIATION, attributes) return self
def createMapper(self): if self.mapperSupported(): mapper_class_name = 'mappers.Mapper{0}'.format(self.mapperType) MapperClass = get_class(mapper_class_name) if MapperClass: mc = MapperClass(self.nes) return mc else: print "Mapper class {0} not found.".format(mapper_class_name) return None else: return None
def init_components(): """Init hackathon factory""" from hackathon.database import db_session from hackathon.database.db_adapters import SQLAlchemyAdapter from hackathon.user import UserManager, UserProfileManager from hackathon.hack import HackathonManager, AdminManager, TeamManager, DockerHostManager, \ AzureCertManager, RegisterManager, HackathonTemplateManager from hackathon.template import TemplateLibrary from hackathon.remote.guacamole import GuacamoleInfo from hackathon.expr.expr_mgr import ExprManager from hackathon.cache.cache_mgr import CacheManagerExt # dependencies MUST be provided in advance factory.provide("util", Utility) factory.provide("log", log) factory.provide("db", SQLAlchemyAdapter, db_session) # utils init_voice_verify() init_sms() factory.provide("email", Email) # cache factory.provide("cache", CacheManagerExt) # scheduler factory.provide("scheduler", scheduler) # business components factory.provide("user_manager", UserManager) factory.provide("user_profile_manager", UserProfileManager) factory.provide("hackathon_manager", HackathonManager) factory.provide("register_manager", RegisterManager) factory.provide("azure_cert_manager", AzureCertManager) factory.provide("docker_host_manager", DockerHostManager) factory.provide("hackathon_template_manager", HackathonTemplateManager) factory.provide("template_library", TemplateLibrary) factory.provide("expr_manager", ExprManager) factory.provide("admin_manager", AdminManager) factory.provide("team_manager", TeamManager) factory.provide("guacamole", GuacamoleInfo) # health check items factory.provide("health_check_mysql", get_class("hackathon.health.health_check.MySQLHealthCheck")) factory.provide("health_check_hosted_docker", get_class("hackathon.health.health_check.HostedDockerHealthCheck")) factory.provide("health_check_alauda_docker", get_class("hackathon.health.health_check.AlaudaDockerHealthCheck")) factory.provide("health_check_guacamole", get_class("hackathon.health.health_check.GuacamoleHealthCheck")) factory.provide("health_check_azure", get_class("hackathon.health.health_check.AzureHealthCheck")) # docker factory.provide("hosted_docker", get_class("hackathon.docker.hosted_docker.HostedDockerFormation")) factory.provide("alauda_docker", get_class("hackathon.docker.alauda_docker.AlaudaDockerFormation")) # storage init_hackathon_storage()
def init_components(): """Init hackathon factory""" from hackathon.user import UserManager, UserProfileManager from hackathon.hack import HackathonManager, AdminManager, TeamManager, DockerHostManager, \ AzureCertManager, RegisterManager, HackathonTemplateManager, Cryptor from hackathon.template import TemplateLibrary from hackathon.remote.guacamole import GuacamoleInfo from hackathon.cache.cache_mgr import CacheManagerExt # dependencies MUST be provided in advance factory.provide("util", Utility) factory.provide("log", log) init_db() # utils init_voice_verify() init_sms() factory.provide("email", Email) # cache factory.provide("cache", CacheManagerExt) # scheduler factory.provide("scheduler", scheduler) # business components factory.provide("user_manager", UserManager) factory.provide("user_profile_manager", UserProfileManager) factory.provide("hackathon_manager", HackathonManager) factory.provide("register_manager", RegisterManager) factory.provide("azure_cert_manager", AzureCertManager) factory.provide("cryptor", Cryptor) factory.provide("docker_host_manager", DockerHostManager) factory.provide("hackathon_template_manager", HackathonTemplateManager) factory.provide("template_library", TemplateLibrary) factory.provide("admin_manager", AdminManager) factory.provide("team_manager", TeamManager) factory.provide("guacamole", GuacamoleInfo) # experiment starter init_expr_components() # health check items factory.provide("health_check_hosted_docker", get_class("hackathon.health.health_check.HostedDockerHealthCheck")) factory.provide("health_check_alauda_docker", get_class("hackathon.health.health_check.AlaudaDockerHealthCheck")) factory.provide("health_check_guacamole", get_class("hackathon.health.health_check.GuacamoleHealthCheck")) factory.provide("health_check_azure", get_class("hackathon.health.health_check.AzureHealthCheck")) factory.provide("health_check_mongodb", get_class("hackathon.health.health_check.MongoDBHealthCheck")) # docker factory.provide("hosted_docker_proxy", get_class("hackathon.docker.hosted_docker.HostedDockerFormation")) factory.provide("alauda_docker_proxy", get_class("hackathon.docker.alauda_docker.AlaudaDockerFormation")) # storage init_hackathon_storage()
def cascading_info(self): ''' 获得当前orm 的所有cascading info。 效果和db.util.get_cascading_info(orm, 'cascading_info', debug=False)['all_info'] 一样。 可以算是语法糖。 :return: dict ''' from util import get_class result = {} for x in self.hierarchy: cascading_info = x.infos.filter(get_class('info').name == 'cascading_info').first() if cascading_info: result.update(cascading_info.extra_data) return result
def push(self, overwrite=False): ''' 将所有读取到的json 预设,推到数据库 :param overwrite: 如果未False,只会创建新的配置。如果未True,即使数据库中已经存在了,也会强制更新配置内容。 :return: ''' import dayu_database session = dayu_database.get_session() table_class = util.get_class( 'storage') if self.prefix == 'storage' else util.get_class( self.prefix + '_config') for key in self.__class__.all_configs: try: old_orm = session.query(table_class).filter( table_class.name == key).one() if overwrite: old_orm.extra_data = self.__class__.all_configs[key] except: new_orm = table_class( name=key, extra_data=self.__class__.all_configs[key]) session.add(new_orm) session.commit() session.close()
def validate_type_name(self, key, value): if value is None: return value import dayu_database from util import get_class session = dayu_database.get_session() try: type_table = get_class('type') type_orm = session.query(type_table).filter(type_table.name == value).one() except: # session.rollback() raise Exception('no TYPE named: {}'.format(value)) return value
def run( self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: str, option_string: typing.Optional[str]): ''' :name parser: The argument parser in use. :name namespace: The namespace for parsed args. :name values: Values for the action. :name option_string: Option string. ''' buildfile_path = config.get_buildfile_path( path=namespace.path, image_name=values) build_config = config.get_build_config( path=namespace.path, image_name=values) tag_build = build_config.get_tag_build() version_from = tag_build.version_from if tag_build is None: LOGGER.info('No tag_build configured') return if version_from: LOGGER.info('Using tag_build for %s', version_from.type.value) repository = util.get_class( package='version_finder', module=version_from.type.value, name=version_from.type.value)(version_from=version_from) # TODO: add semver version = repository.get_latest( first_versions=namespace.first_versions) if version is not None: LOGGER.info('Found version, %s', version) # TODO: add tag build construction VERSION+GIT etc build_config.image.tag = version build_config.image.tag_build.version = version if not namespace.dry_run: with open(file=buildfile_path, mode='w') as buildfile: build_config.to_fobj(fileobj=buildfile)
def main(): """ Method to run if executed as script. """ options, args = parse_options() if options.fetch: # get URLs and data directory = os.path.abspath(options.directory) if os.path.exists(directory) and os.listdir(directory): sys.exit("The target directory must be empty.") print "Determine number of images to fetch..." urls = get_urls(args, options.pages) number_of_images = [(tag, len(urls[tag])) for tag in urls] total = reduce(lambda x,y: x+y, [t[1] for t in number_of_images]) print "Fetching %i images (%s) into %s..." % (total, ', '.join(["%s: %i" % (tag, number) for tag, number in number_of_images]), directory) fetch_data(os.path.abspath(options.directory), urls, True) print "\nAll images fetched." elif options.analyze: # go to analyzer mode if not args: path = Repository.get_last() # open last fetched data else: path = args[0] directory = os.path.abspath(path) if not os.path.exists(directory): sys.exit("The target directoy must exist.") rep = Repository(directory) # load analyzer analyser = [get_class(m)(rep) for m in settings.ANALYZERS] _cmd = AnalyzerCmd(rep, analyser) _cmd.cmdloop("here we go...")
def items(self): ''' 根据search 的内容 查找数据库。 search 的选项会保存在extra_data 中,例如: {'target_table': 'folder', 'filters' : {'and': [{'col': 'name', 'op': 'like', 'value': '%0010%', 'do': True}, {'or': [{'col': 'top.name', 'op': 'eq', 'value': 'ss', 'do': True}, {'col' : 'created_by.name', 'op': 'in', 'value': 'yangzhuo,andyguo', 'do': True}]}]}} 其中 target_table 表示需要查找的table,而filters 表示搜索的条件。filter 的条件可以分为两大类: * 逻辑类,例如 and、or、not。 * filter类 逻辑类是个dict,key是逻辑的类型,value 永远是list。 filter 类,每个dict 表示一个搜索条件,包含有三个key: * col:表示搜索的名称,如果需要连续查询,可以用 . 将属性隔开,例如"top.name" * op:表示进行的操作,例如 in、eq、not_in * value:表示用户输入的内容,也是操作的数据。如果value 需要包含有多个值,可以用 , 隔开。例如:"a,b,c,d" :return: sql 查询对象,如果想要得到实际的内容,需要用户自行list() ''' import dayu_database as db import util import filter_parse # 获得查询的基本类 model_class = util.get_class(self.extra_data.get('target_table', 'folder')) # 建立最基本的查询对象 sql_expr = db.get_session().query(model_class) def _build_filter(model_class, col_name_list): ''' 根据一条filter 条件,创建出查询的主体对象 (用户永远不应该自己调用) :param model_class: ORM class :param col_name_list: string,查询条件中的 col :return: list ''' current_table = model_class relationship_filters = [] # 如果col 的查询名含有. 分割,那么可以认为需要多次连接查询 for col_name in col_name_list: # 得到对应orm class 的所有sqlalchemy 属性,只有这些属性才能够参与sql 的生成。排除纯Python属性 sql_attr = inspect(current_table).attrs.get(col_name, None) # 得到 qlalchemy.orm.attributes.InstrumentedAttribute 的对象。本质上就是类似 FOLDER.xxx 这样的形式 col_attr = getattr(current_table, col_name, None) # 如果是column,那么就是orm 表内属性,不需要子查询 if sql_attr.__class__ == ColumnProperty: relationship_filters.append(col_attr) # 如果是relationship,那么久需要子查询 elif sql_attr.__class__ == RelationshipProperty: # 判断是 one-to-many,还是many-to-one,来选择has、any 函数 if sql_attr.uselist: col_attr = getattr(col_attr, 'any', None) else: col_attr = getattr(col_attr, 'has', None) # 由于子查询可能跨表查询,所以后续的orm class 就会变为新的table current_table = sql_attr.mapper.class_ relationship_filters.append(col_attr) else: raise Exception('no such a name in ORM') return relationship_filters def traverse_filter(raw_filter): ''' 遍历整个search dict :param raw_filter: dict,必须是一个逻辑类的的。例如:{and: [...]} :return: 可以用在.filter() 函数内 ''' for key, value in raw_filter.items(): # 得到逻辑函数 logic = filter_parse.LOGIC_SWITCH[key] # 用于存放当前逻辑函数内的查询条件 param = [] for sub in value: # 如果查询条件没有col, 那么认为是一个嵌套的逻辑函数,递归调用traverse_filter if sub.get('col', None) is None: param.append(traverse_filter(sub)) else: # 这部分是正常的一个搜索条件 do = sub.get('do') # 跳过用户没有check 的条件 if not do: continue col_name = sub.get('col') op = sub.get('op') data_type = sub.get('type') # 对value 中的关键字进行解析。如果是string 那么直接返回,如果是dict,那么寻找dict 对应的key 处理函数 exp_value = filter_parse.resolve_expression(sub.get('value')) # 根据sqlalchemy 定义的data type 对string 进行转换,这样才能够得到正确的比较结果(例如DATETIME) exp_value = filter_parse.resolve_type(data_type, exp_value) # 调用_build_filter 得到查询的函数列表,列表的顺序是按照查询顺序排布 attr_list = _build_filter(model_class, col_name.split('.')) # 小技巧,因为sqlalchemy 没有not_in 之类的操作,只能讲其拆解为两部分,先判断in,然后对结果再not if 'not' in op: op = op.replace('not', '').strip('_') # 如果op 是in 的操作,那么value 需要是list if op == 'in': attr_list[-1] = attr_list[-1].in_(exp_value.split(',')) # 其他正常的操作,通过逐一判断可能存在的函数 else: attr = next((x.format(op) for x in ['{}', '{}_', '__{}__'] if hasattr(attr_list[-1], x.format(op))), None) if attr is None: raise Exception('not a legal op') if exp_value == 'null': exp_value = None attr_list[-1] = getattr(attr_list[-1], attr)(exp_value) # 反向reduce,总是用 list[n](list[n+1]),直到全部完成 # 得到类似 FOLDER.top.has(FOLDER.created.has(USER.name.in_([...]))) single_sql = attr_list.pop() while attr_list: single_sql = attr_list.pop()(single_sql) # 如果存在not,此时将所有的查询语句取反,例如 not_(...) if 'not' in sub.get('op'): single_sql = filter_parse.LOGIC_SWITCH['not'](single_sql) # 加入逻辑列表 param.append(single_sql) # 返回逻辑函数 return logic(*param) # 真正的调用,并且返回sql 查询对象,可以使用for loop 来遍历。否则需要用户自己 list() if self.extra_data.get('filters', None): filter_func = traverse_filter(self.extra_data['filters']) return (x for x in sql_expr.filter(filter_func).filter(model_class.active == True)) else: return []
def main(): '''Runs query and builds symlinks ''' usage = ("( python -m osdcquery.osdcquery | %prog ) [options] query_name" "[url] query_string\n If url is missing it will use the default from" " the configuration module") parser = OptionParser(usage=usage) # add shared options shared_options(parser) parser.add_option("-t", "--target_dir", dest="target_dir", help="target directory (where the original files are located)") parser.add_option("-l", "--link_dir", dest="link_dir", help="link directory (where to put the generated symlinks)") parser.add_option("-i", "--dangle", dest="dangle", action="store_true", help="ignore nonexisting target file; create dangling link", default=False) parser.add_option("-u", "--update", action="store_true", dest="update", help="update files in query directory using .info file", default=False) (options, args) = parser.parse_args() logger = get_simple_logger(options.loglevel, options.verbose) settings = importlib.import_module(options.config) target_dir = options.target_dir if options.target_dir else \ settings.target_dir link_dir = options.link_dir if options.link_dir else settings.link_dir link_dir = os.path.expanduser(link_dir) max_args = 3 min_args = 2 if options.update: max_args = 1 min_args = 1 if len(args) > max_args or len(args) < min_args: parser.error("incorrect number of arguments") query_name = args[0] fs_handler_class = get_class(settings.fs_handler_module_name, settings.fs_handler_class_name) fs_handler = fs_handler_class() new_dir = os.path.join(link_dir, query_name) if options.update: info = json.loads(fs_handler.read_manifest(new_dir)) query_url = info[QUERY_URL] query_string = info[QUERY_STRING] else: if len(args) == 2: query_url = settings.url query_string = args[1] else: query_url = args[1] query_string = args[2] if fs_handler.exists(new_dir) and not options.update: error_message = 'Directory "%s" already exists' % new_dir logger.error(error_message) exit(1) query_class = get_class(settings.query_module_name, settings.query_class_name) query = query_class(query_url, settings.query_fields, settings.non_disease_dir) dirbuild_class = get_class(settings.dirbuild_module_name, settings.dirbuild_class_name) builder = dirbuild_class(target_dir, os.path.join(link_dir, query_name)) query_results = query.run_query(query_string) logger.debug(query_results) if len(query_results) < 1: print "Query returned 0 results" links = {} else: links = builder.associate(query_results) if len(links) < 1: print "No links to be created" if options.update: logger.info("Updating directory %s" % new_dir) else: logger.info("Making directory %s" % new_dir) fs_handler.mkdir(new_dir) num_links = 0 for link, target in links.items(): exists = fs_handler.exists(target) if not exists: logger.warning("File %s does not exist on disk." % target) if exists or options.dangle: logger.info("Creating link %s to target %s" % (link, target)) fs_handler.symlink(target, link) num_links += 1 manifest = create_manifest(query_name, query_url, query_string, options.config, len(links), num_links) fs_handler.write_manifest(new_dir, manifest)
def init_components(): """Init hackathon factory""" from hackathon.database import db_session from hackathon.database.db_adapters import SQLAlchemyAdapter from hackathon.user import UserManager, UserProfileManager from hackathon.hack import HackathonManager, AdminManager, TeamManager, DockerHostManager, \ AzureCertManager, RegisterManager, HackathonTemplateManager from hackathon.template import TemplateLibrary from hackathon.remote.guacamole import GuacamoleInfo from hackathon.expr.expr_mgr import ExprManager from hackathon.cache.cache_mgr import CacheManagerExt from hackathon.hazure.azure_formation import AzureFormation # dependencies MUST be provided in advance factory.provide("util", Utility) factory.provide("log", log) factory.provide("db", SQLAlchemyAdapter, db_session) # hazure factory.provide("azure_formation", AzureFormation) # utils init_voice_verify() init_sms() factory.provide("email", Email) # cache factory.provide("cache", CacheManagerExt) # scheduler factory.provide("scheduler", scheduler) # business components factory.provide("user_manager", UserManager) factory.provide("user_profile_manager", UserProfileManager) factory.provide("hackathon_manager", HackathonManager) factory.provide("register_manager", RegisterManager) factory.provide("azure_cert_manager", AzureCertManager) factory.provide("docker_host_manager", DockerHostManager) factory.provide("hackathon_template_manager", HackathonTemplateManager) factory.provide("template_library", TemplateLibrary) factory.provide("expr_manager", ExprManager) factory.provide("admin_manager", AdminManager) factory.provide("team_manager", TeamManager) factory.provide("guacamole", GuacamoleInfo) # health check items factory.provide( "health_check_mysql", get_class("hackathon.health.health_check.MySQLHealthCheck")) factory.provide( "health_check_hosted_docker", get_class("hackathon.health.health_check.HostedDockerHealthCheck")) factory.provide( "health_check_alauda_docker", get_class("hackathon.health.health_check.AlaudaDockerHealthCheck")) factory.provide( "health_check_guacamole", get_class("hackathon.health.health_check.GuacamoleHealthCheck")) factory.provide( "health_check_azure", get_class("hackathon.health.health_check.AzureHealthCheck")) # docker factory.provide( "hosted_docker", get_class("hackathon.docker.hosted_docker.HostedDockerFormation")) factory.provide( "alauda_docker", get_class("hackathon.docker.alauda_docker.AlaudaDockerFormation")) # storage init_hackathon_storage()
def init_components(): """Init hackathon factory""" from hackathon.database import db_session from hackathon.database.db_adapters import SQLAlchemyAdapter from hackathon.user import UserManager from hackathon.azureformation.azure_file_service import FileService from hackathon.hack import HackathonManager, AdminManager, TeamManager, DockerHostManager, AzureCertManager from hackathon.registration.register_mgr import RegisterManager from hackathon.template.template_mgr import TemplateManager from hackathon.remote.guacamole import GuacamoleInfo from hackathon.expr.expr_mgr import ExprManager from hackathon.cache.cache_mgr import CacheManagerExt from hackathon.azureformation.azure_adapter import AzureAdapter from hackathon.azureformation.azure_subscription_service import SubscriptionService from hackathon.azureformation.azure_vm_service import AzureVMService from hackathon.azureformation.azure_storage_account_service import StorageAccount from hackathon.azureformation.azure_cloud_service import CloudService # dependencies MUST be provided in advance factory.provide("util", Utility) factory.provide("log", log) print '--------factory db---------' factory.provide("db", SQLAlchemyAdapter, db_session) print '--------end factory db---------' # scheduler factory.provide("scheduler", scheduler) # business components factory.provide("user_manager", UserManager) factory.provide("hackathon_manager", HackathonManager) factory.provide("register_manager", RegisterManager) factory.provide("file_service", FileService) factory.provide("azure_cert_manager", AzureCertManager) factory.provide("docker_host_manager", DockerHostManager) factory.provide("template_manager", TemplateManager) factory.provide("expr_manager", ExprManager) factory.provide("admin_manager", AdminManager) factory.provide("team_manager", TeamManager) factory.provide("guacamole", GuacamoleInfo) factory.provide("cache", CacheManagerExt) # health check items factory.provide("health_check_mysql", get_class("hackathon.health.health_check.MySQLHealthCheck")) factory.provide("health_check_hosted_docker", get_class("hackathon.health.health_check.HostedDockerHealthCheck")) factory.provide("health_check_alauda_docker", get_class("hackathon.health.health_check.AlaudaDockerHealthCheck")) factory.provide("health_check_guacamole", get_class("hackathon.health.health_check.GuacamoleHealthCheck")) factory.provide("health_check_azure", get_class("hackathon.health.health_check.AzureHealthCheck")) # docker factory.provide("docker", get_class("hackathon.docker.docker_helper.DockerHelper")) factory.provide("hosted_docker", get_class("hackathon.docker.hosted_docker.HostedDockerFormation")) factory.provide("alauda_docker", get_class("hackathon.docker.alauda_docker.AlaudaDockerFormation")) # azure factory.provide("azure_adapter", AzureAdapter) factory.provide("azure_subscription_service", SubscriptionService) factory.provide("azure_vm_service", AzureVMService) factory.provide("azure_cloud_service", CloudService) factory.provide("azure_storage_account_service", StorageAccount) # storage init_hackathon_storage()
__init__ for dj_simple_sms This module also imports and creates the sender object that will be used to send an SMS """ import util import senders from django.conf import settings # here I should get the sender from settings _sms_sender_path = getattr(settings, 'SMS_SENDER_CLASS', None) if _sms_sender_path is None: # default to senders.DjangoQueueSMSSender SMS_SENDER = senders.DjangoQueueSMSSender() else: SMS_SENDER = util.get_class(_sms_sender_path)() import urls import models def sample_sms_handler(sms): """ This is an example of the signature that a SMS handler should have """ print "--------------" print "SMS RECEIVED:" print sms.to_message() print "--------------"
attr(MN_STEPS, int, mn_steps) attr(MD_STEPS, int, md_steps) attr(MULTI, int, multi) attr(EQ_STEPS, int, eq_steps) attr(SHOULD_SOLVATE, int, should_solvate) attr(MAINSELECTION, str, mainselection) attr(MN_ARGS, dict, mn_args) attr(EQ_ARGS, dict, eq_args) attr(MD_ARGS, dict, md_args) # try to create an integrator attr(INTEGRATOR, str, integrator) attr(INTEGRATOR_ARGS, list, integrator_args) try: integrator_type = util.get_class(conf[INTEGRATOR]) integrator_type(None, *conf[INTEGRATOR_ARGS]) print("succesfully created integrator {}".format(integrator)) except Exception, e: print("error creating integrator {}".format(integrator)) raise e # make a top if we don't have one if top is None: print("attempting to auto-generate a topology...") with md.topology(struct=struct, protein="protein") as top: # topology returns: # {'top': '/home/andy/tmp/Au/top/system.top', # 'dirname': 'top', # 'struct': '/home/andy/tmp/Au/top/protein.pdb'}
def init_components(): """Init hackathon factory""" from hackathon.database import db_session from hackathon.database.db_adapters import SQLAlchemyAdapter from hackathon.user import UserManager from hackathon.azureformation.azure_file_service import FileService from hackathon.hack import HackathonManager, AdminManager, TeamManager, DockerHostManager, AzureCertManager from hackathon.registration.register_mgr import RegisterManager from hackathon.template.template_mgr import TemplateManager from hackathon.remote.guacamole import GuacamoleInfo from hackathon.expr.expr_mgr import ExprManager from hackathon.cache.cache_mgr import CacheManagerExt from hackathon.azureformation.azure_adapter import AzureAdapter from hackathon.azureformation.azure_subscription_service import SubscriptionService from hackathon.azureformation.azure_vm_service import AzureVMService from hackathon.azureformation.azure_storage_account_service import StorageAccount from hackathon.azureformation.azure_cloud_service import CloudService # dependencies MUST be provided in advance factory.provide("util", Utility) factory.provide("log", log) print '--------factory db---------' factory.provide("db", SQLAlchemyAdapter, db_session) print '--------end factory db---------' # scheduler factory.provide("scheduler", scheduler) # business components factory.provide("user_manager", UserManager) factory.provide("hackathon_manager", HackathonManager) factory.provide("register_manager", RegisterManager) factory.provide("file_service", FileService) factory.provide("azure_cert_manager", AzureCertManager) factory.provide("docker_host_manager", DockerHostManager) factory.provide("template_manager", TemplateManager) factory.provide("expr_manager", ExprManager) factory.provide("admin_manager", AdminManager) factory.provide("team_manager", TeamManager) factory.provide("guacamole", GuacamoleInfo) factory.provide("cache", CacheManagerExt) # health check items factory.provide( "health_check_mysql", get_class("hackathon.health.health_check.MySQLHealthCheck")) factory.provide( "health_check_hosted_docker", get_class("hackathon.health.health_check.HostedDockerHealthCheck")) factory.provide( "health_check_alauda_docker", get_class("hackathon.health.health_check.AlaudaDockerHealthCheck")) factory.provide( "health_check_guacamole", get_class("hackathon.health.health_check.GuacamoleHealthCheck")) factory.provide( "health_check_azure", get_class("hackathon.health.health_check.AzureHealthCheck")) # docker factory.provide("docker", get_class("hackathon.docker.docker_helper.DockerHelper")) factory.provide( "hosted_docker", get_class("hackathon.docker.hosted_docker.HostedDockerFormation")) factory.provide( "alauda_docker", get_class("hackathon.docker.alauda_docker.AlaudaDockerFormation")) # azure factory.provide("azure_adapter", AzureAdapter) factory.provide("azure_subscription_service", SubscriptionService) factory.provide("azure_vm_service", AzureVMService) factory.provide("azure_cloud_service", CloudService) factory.provide("azure_storage_account_service", StorageAccount) # storage init_hackathon_storage()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', default='/home/ubuntu/shopee-dataset/train.csv') parser.add_argument('--instance-dataset', default='/home/ubuntu/fashion/fashion-dataset/images/') parser.add_argument('--label-split', default='/home/ubuntu/shopee-dataset/train_labels.csv') parser.add_argument('--epochs', type=int) parser.add_argument('--embedding-size', type=int) parser.add_argument('--batch-size', type=int) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--warmup_k', type=int) parser.add_argument('--image-size', type=int) parser.add_argument('--freeze-batchnorm', action='store_true') parser.add_argument('--samples-per-class', type=int, default=2) parser.add_argument('--apex', action='store_true') parser.add_argument('--lr-steps', nargs='+', type=int) parser.add_argument('--mode', default='train', choices=('train', 'trainval', 'test')) parser.add_argument('--log-filename', default='example') parser.add_argument('--config', default='configs/baseline.py') parser.add_argument('--output', default='experiments/baseline') parser.add_argument('--instance-augmentation-weight', type=float, default=1.0) parser.add_argument('--instance-augmentation', action='store_true') args = parser.parse_args() if args.apex: from apex import amp config = util.load_config(args.config) util.update_args(args, config, additional_keys=('epochs',)) if args.warmup_k is None: args.warmup_k = config['warmup_k'] np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) train_labels = np.loadtxt(args.label_split, dtype=np.int64) dataset = data.DMLDataset( args.dataset, image_size=args.image_size, mixup_alpha=config['mixup_alpha'], subset_labels=train_labels) sampler = data.BalancedBatchSampler( batch_size=args.batch_size, dataset=dataset, samples_per_class=args.samples_per_class) loader = data_util.DataLoader( dataset, batch_size=args.batch_size, sampler=sampler, num_workers=8, collate_fn=dataset.collate_fn) instance_loader = None if args.instance_augmentation: instance_dataset = data.InstanceAugmentationDMLDataset( args.instance_dataset, image_size=args.image_size) instance_loader = data_util.DataLoader( instance_dataset, batch_size=args.batch_size, drop_last=True, num_workers=8, collate_fn=instance_dataset.collate_fn) val_labels = data.get_val_labels(args.dataset, set(train_labels)) val_labels = list(val_labels) val_dataset = data.DMLDataset( args.dataset, image_size=args.image_size, is_training=False, subset_labels=val_labels) val_loader = data_util.DataLoader( val_dataset, batch_size=args.batch_size, collate_fn=val_dataset.collate_fn, num_workers=4 ) print(f'# samples {len(dataset)}') backbone = util.get_class_fn(config['model'])() backbone.eval() num_learners = 1 in_sizes = [] if config['is_ensemble']: tmp = backbone(torch.rand(1, 3, args.image_size, args.image_size)) num_learners = len(tmp) for i in range(num_learners): in_sizes.append(tmp[i].squeeze().size(0)) in_size = in_sizes[0] else: in_size = backbone(torch.rand(1, 3, args.image_size, args.image_size)).squeeze().size(0) in_sizes.append(in_size) backbone.train() embeddings = [] for i in range(num_learners): emb = torch.nn.Linear(in_sizes[i], args.embedding_size) emb.cuda() embeddings.append(emb) sim_siam = None if args.instance_augmentation: sim_siam = SimSiamEmbedding() sim_siam.cuda() model = SimSiamEmbeddingPredictor(backbone, embeddings, sim_siam) model.train() else: model = EmbeddingPredictor(backbone, embeddings) model.train() def set_bn_eval(m): if m.__class__.__name__.find('BatchNorm') != -1: print('set bn eval...') m.eval() if args.freeze_batchnorm: model.apply(set_bn_eval) if not args.apex: model = torch.nn.DataParallel(model) if args.instance_augmentation: model = SwitchableBatchNorm.convert_switchable_batchnorm(model, 2) model = model.cuda() criterion_list = [] for i in range(num_learners): criterion = util.get_class_fn(config['criterion'])( embedding_size=args.embedding_size, num_classes=dataset.num_classes).cuda() criterion_list.append(criterion) opt_warmup = util.get_class(config['opt']['type'])([ { **{'params': list(backbone.parameters()) }, 'lr': 0 }, { **{'params': sum([list(emb.parameters()) for emb in embeddings], []) }, **config['opt']['args']['embedding'] }, { **{'params': sum([list(c.parameters()) for c in criterion_list], []) }, **config['opt']['args']['proxynca'] }, ], **config['opt']['args']['base']) opt = util.get_class(config['opt']['type'])([ { **{'params': list(backbone.parameters()) }, **config['opt']['args']['backbone'] }, { **{'params': sum([list(emb.parameters()) for emb in embeddings], []) }, **config['opt']['args']['embedding'] }, { **{'params': sum([list(c.parameters()) for c in criterion_list], []) }, **config['opt']['args']['proxynca'] }, ], **config['opt']['args']['base']) if args.apex: (model, *criterion_list), (opt, opt_warmup) = amp.initialize([model] + criterion_list, [opt, opt_warmup], opt_level='O1') model = torch.nn.DataParallel(model) scheduler = util.get_class(config['lr_scheduler']['type'])( opt, **config['lr_scheduler']['args']) if not os.path.exists('log'): os.makedirs('log') if not os.path.exists(args.output): os.makedirs(args.output) logging.basicConfig( format='%(asctime)s %(message)s', level=logging.INFO, handlers=[ logging.FileHandler(f'log/{args.log_filename}'), logging.StreamHandler() ] ) logging.info('Training parameters: {}'.format(vars(args))) logging.info('Training for {} epochs'.format(args.epochs)) tic = time.time() logging.info(f'warmup for {args.warmup_k} epochs') instance_aug_iter = None if args.instance_augmentation: instange_aug_iter = iter(instance_loader) for e in range(args.warmup_k): for batch in loader: imgs = batch['image'] text = batch['text'] labels = batch['label'] if args.instance_augmentation: SwitchableBatchNorm.switch_to(model, 0) opt_warmup.zero_grad() ms = model(imgs.cuda()) if args.instance_augmentation: ms = ms[:-2] loss = 0 for m, criterion in zip(ms, criterion_list): loss += criterion(m, labels.cuda()) if args.instance_augmentation: try: instance_batch = next(instance_aug_iter) except: instance_aug_iter = iter(instance_loader) instance_batch = next(instance_aug_iter) SwitchableBatchNorm.switch_to(model, 1) preds1 = model(instance_batch['image1']) preds2 = model(instance_batch['image2']) z1, p1 = preds1[-2:] z2, p2 = preds2[-2:] negcos_loss = (negcos(p1, z2) / 2.0 + negcos(p2, z1) / 2.0) * args.instance_augmentation_weight #d_loss1, = grad(negcos_loss, (backbone,)) #d_loss2, = grad(loss, (backbone,)) #print(d_loss1, d_loss2) loss += negcos_loss SwitchableBatchNorm.switch_to(model, 0) if args.apex: with amp.scale_loss(loss, opt_warmup) as scaled_loss: scaled_loss.backward() else: loss.backward() torch.nn.utils.clip_grad_value_(model.parameters(), 10) opt_warmup.step() logging.info(f'warm up iteration {e} finished') losses = [] scores = [] lr_steps = [] it = 0 prev_lr = 0 writer = tensorboard.SummaryWriter(args.output) best_acc = 0 for e in range(args.epochs): curr_lr = opt.param_groups[0]['lr'] print(prev_lr, curr_lr) if curr_lr != prev_lr: prev_lr = curr_lr lr_steps.append(e) tic_per_epoch = time.time() losses_per_epoch = [] negcos_loss_per_epoch = [] grad_norm_negcos = [] grad_norm_loss = [] for batch in loader: imgs, text, labels = batch['image'], batch['text'], batch['label'] opt.zero_grad() it += 1 if args.instance_augmentation: SwitchableBatchNorm.switch_to(model, 0) ms = model(imgs.cuda()) if args.instance_augmentation: ms = ms[:-2] loss = 0 for m, criterion in zip(ms, criterion_list): loss += criterion(m, labels.cuda()) if args.instance_augmentation: try: instance_batch = next(instance_aug_iter) except: instance_aug_iter = iter(instance_loader) instance_batch = next(instance_aug_iter) SwitchableBatchNorm.switch_to(model, 1) preds1 = model(instance_batch['image1']) preds2 = model(instance_batch['image2']) SwitchableBatchNorm.switch_to(model, 0) z1, p1 = preds1[-2:] z2, p2 = preds2[-2:] negcos_loss = (negcos(p1, z2) / 2.0 + negcos(p2, z1) / 2.0) * args.instance_augmentation_weight #d_loss1, = grad(negcos_loss, (backbone,)) #d_loss2, = grad(loss, (backbone,)) #grad_norm_negcos.append(d_loss1.detach().cpu().numpy()) #grad_norm_loss.append(d_loss2.detach().cpu().numpy()) loss += negcos_loss negcos_loss_per_epoch.append(negcos_loss.detach().cpu().numpy()) if args.apex: with amp.scale_loss(loss, opt) as scaled_loss: scaled_loss.backward() else: loss.backward() first_param = list(model.parameters())[0] #print(first_param.size(), np.linalg.norm(first_param.grad.cpu().numpy())) torch.nn.utils.clip_grad_value_(model.parameters(), 10) losses_per_epoch.append(loss.data.detach().cpu().numpy()) #print(losses_per_epoch[-1]) opt.step() toc_per_epoch = time.time() print(opt) logging.info(f'epoch: {e} in {toc_per_epoch - tic_per_epoch}') losses.append(np.mean(losses_per_epoch)) tic_val = time.time() acc = eval_utils.evaluate(model, val_loader) toc_val = time.time() if args.freeze_batchnorm: model.apply(set_bn_eval) if acc > best_acc: logging.info('found new best accuracy, saving model...') best_acc = acc torch.save({ 'epoch': e, 'state_dict': model.state_dict(), 'accuracy': best_acc, 'optimizer': opt.state_dict(), 'amp': amp.state_dict() if args.apex else None }, os.path.join(args.output, f'model_best_epoch_{e}.pth')) scores.append(acc) scheduler.step(acc) logging.info(f'Accuracy: {acc} in epoch: {e}, loss: {losses[-1]}, val_time: {toc_val - tic_val}, negcos: {np.mean(negcos_loss_per_epoch)}') #logging.info(f'grad_negcos: {np.mean(grad_norm_negcos)} grad_loss: {np.mean(grad_norm_loss)}') writer.add_scalar('loss_train', losses[-1], e) writer.add_scalar('val_accuracy', scores[-1], e) writer.add_scalar('train_time', toc_per_epoch - tic_per_epoch, e) writer.add_scalar('val_time', toc_val - tic_val, e) writer.add_scalar('learning_rate', lr_steps[-1], e) writer.flush() writer.close() # step the scheduler if accuracy does not increase. scheduler.step(acc)
#!/usr/bin/python import sys import yaml import util import os directory = sys.argv[1] if not os.path.exists(directory): os.makedirs(directory) with open(sys.argv[2]) as f: params = yaml.load(f) params['data']['directory'] = directory engine = util.create_engine() data_name = params['data'].pop('name') data = util.get_class(data_name)(**params['data']) data.read_sql() data.write()
def buildEnsemble(baseline_matrix, index_matrix, output_cells, validation_matrix=None, verboseCLI=False, CVMode=False, classpath='sklearn.kernel_ridge.KernelRidge', **predictArgs): """ """ (rows, columns) = index_matrix.shape known_inds = np.where(~np.isnan(index_matrix)) ensembleFolder = 'ensemble/' resultFolder = 'results/' additionalFolder = 'additional/' ensembleMatrices, ensembleParams, ensembleFilenames = loadModels( ensembleFolder, resultFolder, verbose=verboseCLI) # Create X where rows are known cells and columns are the predictions # for that cells(each column coming from a different model) # samples x features y = index_matrix[known_inds] X = np.zeros( (len(known_inds[0]), len(ensembleMatrices) + 2), dtype='float32') Xout = np.zeros( (len(output_cells[0]), len(ensembleMatrices) + 2), dtype='float32') # Insert predictions of other models for i, mat in enumerate(ensembleMatrices): X[:, i] = ensembleMatrices[i][known_inds] Xout[:, i] = ensembleMatrices[i][output_cells] X = np.clip(X, 1, 5, X) Xout = np.clip(Xout, 1, 5, Xout) # Insert the user and item id as two additional features X[:, len(ensembleMatrices)] = known_inds[0] X[:, len(ensembleMatrices) + 1] = known_inds[1] Xout[:, len(ensembleMatrices)] = output_cells[0] Xout[:, len(ensembleMatrices) + 1] = output_cells[1] temp = None ensembleMatrices = None # We load additional features generated by other models # e.g for an entry (i.j) we add the vector u_i and v_j # as features print("Loading additional features...") additionalFilenames = os.listdir(additionalFolder) additionalFilenames.sort() tuples = [] # Count features to allocate enough memory try: counter = 0 for i, f in enumerate(additionalFilenames): with open(additionalFolder + f, 'rb') as file: temp = pickle.load(file) for tup in temp: tuples.append(tup) # The models generate two types of features in # our 'additional' folder. First, there is a factorisation # produced by many of our models here. The second is # when we want to add a single feature for the users. # e.g the assigned cluster number from K-Means # The first element in the tuple determines the type # so that it can be handled appropriately. if tup[0] == 'user-item-factorisation': U = tup[1] Z = tup[2] counter += U.shape[1] counter += Z.shape[1] elif tup[0] == 'user-single-feature': counter += 1 else: raise Exception("Label not recognized") except EnvironmentError as e: print(e) sys.exit(1) # Allocate memory X_old = X Xout_old = Xout X = np.zeros((X_old.shape[0], X_old.shape[1] + counter), dtype='float32') X[:, :X_old.shape[1]] = X_old[:, :] Xoffset = X_old.shape[1] X_old = None Xout = np.zeros( (Xout_old.shape[0], Xout_old.shape[1] + counter), dtype='float32') Xout[:, :Xout_old.shape[1]] = Xout_old[:, :] Xoutoffset = Xout_old.shape[1] Xout_old = None # Insert the loaded features in the new matrix for i, tup in enumerate(tuples): if tup[0] == 'user-item-factorisation': U = tup[1] Z = tup[2] X[:, Xoffset:Xoffset + U.shape[1]] = U[known_inds[0], :] Xoffset += U.shape[1] X[:, Xoffset:Xoffset + Z.shape[1]] = Z[known_inds[1], :] Xoffset += Z.shape[1] Xout[:, Xoutoffset:Xoutoffset + U.shape[1]] = U[output_cells[0], :] Xoutoffset += U.shape[1] Xout[:, Xoutoffset:Xoutoffset + Z.shape[1]] = Z[output_cells[1], :] Xoutoffset += Z.shape[1] elif tup[0] == 'user-single-feature': feature = tup[1] feature = feature.reshape(-1, 1) X[:, Xoffset:Xoffset + 1] = feature[known_inds[0], :] Xoffset += 1 Xout[:, Xoutoffset:Xoutoffset + 1] = feature[output_cells[0], :] Xoutoffset += 1 tuples = None print("Additional features loaded: {}".format(counter)) print("==================================================") # Get predictor class object from string. # It is assumed that there is a fit() and a predict() method. classObj = util.get_class(classpath) classInst = classObj(**predictArgs) print(classInst) classInst = classInst.fit(X, y) X = None y = None output = classInst.predict(Xout).reshape(-1) Xout = None result = np.zeros((rows, columns), dtype='float32') result[output_cells] = np.clip(output, 1, 5) return result
def __init__(self, fid, mode="r"): """ Create a system object Args: fid: file name of a configuration hdf file mode: the mode to open the file, defaults to "r" - read only. For running a simulation, it should be "a" - Read/write if exists. For debugging, mode should be 'd', this mode opens the file in read/write mode but does not clear any timesteps. """ logging.info("creating System, fid={}".format(fid)) fmode = 'a' if (mode == 'a' or mode == 'd') else 'r' self.hdf = h5py.File(fid, fmode) self.config = self.hdf[CONFIG].attrs self._box = self.config[BOX] # if there is a current timestep, keep it around for debugging purposes if self.hdf.id.links.exists(CURRENT_TIMESTEP): print( "WARNING, found previous \"current_timestep\" key, this means that a previous simulation likely crashed" ) logging.warn( "found previous \"current_timestep\" key, this means that a previous simulation likely crashed" ) # load the universe object from either the last timestep, or from the src_files # its expensive to create a universe, so keep it around for the lifetime # of the system self.universe = self._create_universe() # load the subsystems # this list will remain constant as long as the topology remains constant. logging.info("creating subsystems") factory = util.get_class(self.config[SUBSYSTEM_FACTORY]) self.ncgs, self.subsystems = factory( self, self.config[SUBSYSTEM_SELECTS], *(self.config[INTEGRATOR_ARGS].tolist() + self.config[SUBSYSTEM_ARGS].tolist())) logging.debug("using {} cg variables for each {} subsystems".format( self.ncgs, len(self.subsystems))) # notify subsystems, we have a new universe [s.universe_changed(self.universe) for s in self.subsystems] md_nensemble = self.config[MULTI] # number of data points in trajectory, md steps / output interval md_nsteps = int(self.config[MD_STEPS]) / int(self.md_args[NSTXOUT]) # number of subsystems nrs = len(self.subsystems) # cg: nensembe x n segment x n_step x n_cg self.cg_positions = zeros((md_nensemble, nrs, md_nsteps, self.ncgs)) self.cg_forces = zeros((md_nensemble, nrs, md_nsteps, self.ncgs)) self.cg_velocities = zeros((md_nensemble, nrs, md_nsteps, self.ncgs)) logging.info("pos {}".format(self.cg_positions.shape)) logging.info("frc {}".format(self.cg_forces.shape)) logging.info("vel {}".format(self.cg_velocities.shape))
args = parser.parse_args() if args.file: with open(args.input) as f: params_orig = yaml.load(f) else: params_orig = yaml.load(args.input.replace('\\n', '\n')) params = deepcopy(params_orig) data_name = params['data'].pop('name') model_name = params['model'].pop('name') print 'Loading ' + data_name print ' with parameters ' + str(params['data']) lead_data = util.get_class(data_name)(**params['data']) lead_data.read() print 'Tranforming with parameter ' + str(params['transform']) lead_data.transform(**params['transform']) train, test = lead_data.cv print 'Training ' + model_name print ' with parameters ' + str(params['model']) print ' on ' + str(train.sum()) + ' examples' print ' with ' + str(len(lead_data.X.columns)) + ' features' estimator = util.get_class(model_name)(**params['model'])