Esempio n. 1
0
def handle_chat(message, protocol):
    """
    :return: Weather or not the chat message was a valid command
    """
    if message == 'You are already chatting in that group.':
        return True
    elif re.match(r'From Amelorate: ssh', message):
        chat.say(message[20:])
        return True

    match = re.match(r'From .*:\s', message)
    if match:
        print(':c: ' + message[5:])
        name = message[5:].split(': ')[0]
        message = message[match.end():]
        for command in COMMANDS['regex'].keys():
            match = re.match(command + r'\s', message, re.IGNORECASE)
            if match:
                message = message[match.end():]
                message = message[:-1]
                locate('commands.' + COMMANDS['regex'][command] + '.call')(message, name, protocol, CONFIG, COMMANDS)
                return True
        chat.say('/msg ' + name + ' Sorry, that command was not recognized as valid.')
        return False
    else:
        return False
Esempio n. 2
0
def _unwrap_object(obj, nested=False):
    obj_type = obj['_type']
    value = obj.get('value', None)

    if obj_type == 'none':
        return None

    if obj_type in ('bool', 'str', 'int', 'float'):
        return locate(obj_type)(value)

    if obj_type == 'decimal':
        return Decimal(value)

    if obj_type == 'datetime':
        return datetime.datetime.utcfromtimestamp(value)

    if obj_type in ('list', 'dict'):
        return locate(obj_type)(unwraps(value)) if nested else value

    if obj_type in ('set', 'frozenset', 'tuple'):
        if nested:
            value = unwraps(value)
        return locate(obj_type)(value)

    if obj_type == 'frozendict':
        if nested:
            value = unwraps(value)
        return frozendict(value)

    raise ValueError(repr(obj) + ' cannot be decoded.')
Esempio n. 3
0
def create_constants(rdbms: str):
    """
    Factory for creating a Constants objects (i.e. objects for creating constants based on column widths, and auto
    increment columns and labels).

    :param rdbms: The target RDBMS (i.e. mysql or mssql).
    :rtype : pystratum.Constants.Constants
    """
    # Note: We load modules and classes dynamically such that on the end user's system only the required modules
    #       and other dependencies for the targeted RDBMS must be installed (and required modules and other
    #       dependencies for the other RDBMSs are not required).

    if rdbms == 'mysql':
        module = locate('pystratum.mysql.MySqlConstants')
        return module.MySqlConstants()

    if rdbms == 'mssql':
        module = locate('pystratum.mssql.MsSqlConstants')
        return module.MsSqlConstants()

    if rdbms == 'pgsql':
        module = locate('pystratum.pgsql.PgSqlConstants')
        return module.PgSqlConstants()

    raise Exception("Unknown RDBMS '%s'." % rdbms)
Esempio n. 4
0
    def get_custom_object_def(cls, element):
        obj_id = element.get("id")  # or "anonymous." + str(uuid.uuid1())
        class_ = element.get("class")
        if pydoc.locate(class_) is None:
            raise ValueError("Can't locate the class: %s" % class_)

        scope = Scope.value_of(element.get("scope", Scope.SINGLETON.value).lower())
        lazy_init = element.get("lazy-init", "true").lower()
        define = ObjectDef(obj_id, pydoc.locate(class_), scope, get_boolean_from_string(lazy_init))
        for sub_element in list(element):
            tag = remove_xmlns_from_tag(sub_element.tag)
            if tag == "constructor-arg":
                arg_name = sub_element.get("name")
                arg_value = cls.get_value_def(sub_element)
                if arg_name is None:
                    define.args.append(arg_value)
                else:
                    define.kwargs[arg_name] = arg_value
            elif tag == "property":
                prop_name = sub_element.get("name")
                prop_value = cls.get_value_def(sub_element)
                if prop_value is not None:
                    define.props[prop_name] = prop_value
            else:
                raise ValueError("Unsupported sub element with tag %s of %s" % (tag, element))
        return define
Esempio n. 5
0
def create_routine_loader(rdbms):
    """
    Factory for creating a Routine Loader objects (i.e. objects for loading stored routines into a RDBMS instance from
    (pseudo) SQL files.

    :param str rdbms: The target RDBMS (i.e. mysql or mssql).
    :rtype: pystratum.RoutineLoader.RoutineLoader
    """
    # Note: We load modules and classes dynamically such that on the end user's system only the required modules
    #       and other dependencies for the targeted RDBMS must be installed (and required modules and other
    #       dependencies for the other RDBMSs are not required).

    if rdbms == 'mysql':
        module = locate('pystratum.mysql.MySqlRoutineLoader')
        return module.MySqlRoutineLoader()

    if rdbms == 'mssql':
        module = locate('pystratum.mssql.MsSqlRoutineLoader')
        return module.MsSqlRoutineLoader()

    if rdbms == 'pgsql':
        module = locate('pystratum.pgsql.PgSqlRoutineLoader')
        return module.PgSqlRoutineLoader()

    raise Exception("Unknown RDBMS '%s'." % rdbms)
Esempio n. 6
0
def create_routine_wrapper_generator(rdbms):
    """
    Factory for creating a Constants objects (i.e. objects for generating a class with wrapper methods for calling
    stored routines in a database).

    :param str rdbms: The target RDBMS (i.e. mysql or mssql).
    :return: pystratum.RoutineWrapperGenerator.RoutineWrapperGenerator
    """
    # Note: We load modules and classes dynamically such that on the end user's system only the required modules
    #       and other dependencies for the targeted RDBMS must be installed (and required modules and other
    #       dependencies for the other RDBMSs are not required).

    if rdbms == 'mysql':
        module = locate('pystratum.mysql.MySqlRoutineWrapperGenerator')
        return module.MySqlRoutineWrapperGenerator()

    if rdbms == 'mssql':
        module = locate('pystratum.mssql.MsSqlRoutineWrapperGenerator')
        return module.MsSqlRoutineWrapperGenerator()

    if rdbms == 'pgsql':
        module = locate('pystratum.pgsql.PgSqlRoutineWrapperGenerator')
        return module.PgSqlRoutineWrapperGenerator()

    raise Exception("Unknown RDBMS '%s'." % rdbms)
Esempio n. 7
0
File: doc.py Progetto: KaSt/nereamud
def cmd_htmldoc(ch, cmd, arg):
    """Creates html documentation for all registered modules. html files will
       be saved to html/pydocs/
    """
    try:
        os.makedirs(HTML_DOC_DIR)
    except: pass
    doc = pydoc.HTMLDoc()
    for modname in suggested_reading:
        todoc = pydoc.locate(modname)
        if todoc != None:
            fname = HTML_DOC_DIR + "/" + modname + ".html"
            fl    = file(fname, "w+")
            fl.write(doc.page(modname, doc.document(todoc)))
            fl.close()

    builtin_index = doc.multicolumn([doc.modulelink(pydoc.locate(modname)) for modname in builtins], lambda x: x)
    
    # build our index page. That includes things in pymodules/ and builtins
    index_contents ="".join([doc.section("<big><strong>builtins</big></strong>",
                                         'white', '#ee77aa', builtin_index),
                             doc.index("../lib/pymodules/")])

    # go over all of our builtins and add them to the index
    index = file(HTML_DOC_DIR + "/index.html", "w+")
    index.write(doc.page("index", index_contents))
    index.close()
    
    ch.send("html documentation generated for all known modules.")
Esempio n. 8
0
def main(argv):

    if FLAGS.debug:
        # Setting to '0': all tensorflow messages are logged.
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
        logging.basicConfig(level=logging.INFO)

    # Extract the merged configs/dictionaries.
    config = io_utils.parse_config(flags=FLAGS)
    if config['model_params']['decode'] and config['model_params']['reset_model']:
        print("Woops! You passed {decode: True, reset_model: True}." 
              " You can't chat with a reset bot! I'll set reset to False.")
        config['model_params']['reset_model'] = False

    # If loading from pretrained, double-check that certain values are correct.
    # (This is not something a user need worry about -- done automatically)
    if FLAGS.pretrained_dir is not None:
        assert config['model_params']['decode'] \
               and not config['model_params']['reset_model']

    # Print out any non-default parameters given by user, so as to reassure
    # them that everything is set up properly.
    io_utils.print_non_defaults(config)

    print("Setting up %s dataset." % config['dataset'])
    dataset_class = locate(config['dataset']) or getattr(data, config['dataset'])
    dataset = dataset_class(config['dataset_params'])
    print("Creating", config['model'], ". . . ")
    bot_class = locate(config['model']) or getattr(chatbot, config['model'])
    bot = bot_class(dataset, config)

    if not config['model_params']['decode']:
        start_training(dataset, bot)
    else:
        start_chatting(bot)
Esempio n. 9
0
def find_injectable_classes(search_paths, exclude_injectable_module_paths=None):
    modules = set()
    for path in search_paths:
        for root, dirs, fnames in os.walk(path):
            for fname in fnames:
                if fname.endswith('.py'):
                    module_path = os.path.relpath(os.path.join(root, fname), path)
                    module = module_path.replace('/', '.')[:-3]
                    fpath = os.path.join(root, fname)
                    has_import = False
                    has_decorator = False
                    with open(fpath) as f:
                        for line in f:
                            if 'dart.context.locator' in line:
                                has_import = True
                            if '@injectable' in line:
                                has_decorator = True
                            if has_import and has_decorator:
                                break
                    if has_import and has_decorator and not path_excluded(module, exclude_injectable_module_paths):
                        modules.add(module)

    for module in modules:
        class_metadata = readmodule(module)
        for class_name in class_metadata.keys():
            # the line below will load the class, which causes the @injectable code to run,
            # registering the class (assuming the module search was not a false positive)
            locate(module + '.' + class_name)

    classes_by_name = {cls.__name__: cls for cls in class_registry.classes}
    for class_name in sorted(classes_by_name.keys()):
        _logger.info('injectable class registered: %s' % class_name)

    return classes_by_name.values()
Esempio n. 10
0
    def __call__(self, environ, start_response):
        session_store = locate(settings.SESSION_STORE)

        if not session_store or not issubclass(session_store, SessionStore):
            raise ValueError(
                'SESSION_STORE must be a sub class of \'SessionStore\''
            )

        session_store = session_store()

        auth_collection = locate(settings.AUTH_COLLECTION)
        if not auth_collection or not issubclass(auth_collection, Collection):
            raise ValueError(
                'AUTH_COLLECTION must be a sub class of \'Collection\''
            )

        environ['session'] = session_store.new()

        session_id = environ.get('HTTP_AUTHORIZATION', '')
        if len(session_id.split('Token ')) == 2:
            session_id = session_id.split('Token ')[1]
            environ['session'] = session_store.get(session_id)
        else:
            cookies = environ.get('HTTP_COOKIE')

            if cookies:
                session_id = parse_cookie(cookies).get('session_id')

                if session_id:
                    environ['session'] = session_store.get(session_id)

        environ[auth_collection.__name__.lower()] = auth_collection.get({
            '_id': deserialize(
                environ['session'].get(auth_collection.__name__.lower(), '""')
            )
        })

        def authentication(status, headers, exc_info=None):
            headers.extend([
                (
                    'Set-Cookie', dump_cookie(
                        'session_id', environ['session'].sid, 7 * 24 * 60 * 60,
                    )
                ),
                (
                    'HTTP_AUTHORIZATION', 'Token {0}'.format(
                        environ['session'].sid
                    )
                ),
            ])

            return start_response(status, headers, exc_info)

        response = self.app(environ, authentication)

        if environ['session'].should_save:
            session_store.save(environ['session'])

        return response
Esempio n. 11
0
def locate_with_hint(class_path, prefix_hints=[]):
    module_or_class = locate(class_path)
    if module_or_class is None:
        # for hint in iscanr(lambda x, y: x + "." + y, prefix_hints):
        #     module_or_class = locate(hint + "." + class_path)
        #     if module_or_class:
        #         break
        hint = ".".join(prefix_hints)
        module_or_class = locate(hint + "." + class_path)
    return module_or_class
Esempio n. 12
0
    def treat_question(self, question, survey):
        LOGGER.info("Treating, %s %s", question.pk, question.text)
        options = self.tconf.get(survey_name=self.survey.name,
                                 question_text=question.text)
        multiple_charts = options.get("multiple_charts")
        if not multiple_charts:
            multiple_charts = {"": options.get("chart")}
        question_synthesis = ""
        i = 0
        for chart_title, opts in multiple_charts.items():
            i += 1
            if chart_title:
                # "" is False, by default we do not add section or anything
                mct = options["multiple_chart_type"]
                question_synthesis += "\%s{%s}" % (mct, chart_title)
            tex_type = opts.get("type")
            if tex_type == "raw":
                question_synthesis += Question2TexRaw(question, **opts).tex()
            elif tex_type == "sankey":
                other_question_text = opts["question"]
                other_question = Question.objects.get(text=other_question_text)
                q2tex = Question2TexSankey(question)
                question_synthesis += q2tex.tex(other_question)
            elif tex_type in ["pie", "cloud", "square", "polar"]:
                q2tex = Question2TexChart(question, latex_label=i, **opts)
                question_synthesis += q2tex.tex()
            elif locate(tex_type) is None:
                msg = "{} '{}' {}".format(
                    _("We could not render a chart because the type"),
                    tex_type,
                    _("is not a standard type nor the path to an "
                      "importable valid Question2Tex child class. "
                      "Choose between 'raw', 'sankey', 'pie', 'cloud', "
                      "'square', 'polar' or 'package.path.MyQuestion2Tex"
                      "CustomClass'")
                )
                LOGGER.error(msg)
                question_synthesis += msg
            else:
                q2tex_class = locate(tex_type)
                # The use will probably know what type he should use in his
                # custom class
                opts["type"] = None
                q2tex = q2tex_class(question, latex_label=i, **opts)
                question_synthesis += q2tex.tex()
        section_title = Question2Tex.html2latex(question.text)
        return u"""
\\clearpage{}
\\section{%s}

\label{sec:%s}

%s

""" % (section_title, question.pk, question_synthesis)
Esempio n. 13
0
def writedoc(key,top=False):
    """Write HTML documentation to a file in the current directory."""
    if(type(key) == str and (key == "modules" or key == "/.")):
        heading = pydoc.html.heading(
            '<br><big><big><strong>&nbsp;'
            'Python: Index of Modules'
            '</strong></big></big>',
            '#ffffff', '#7799ee')
        builtins = []
        for name in sys.builtin_module_names:
            builtins.append('<a href="%s">%s</a>' % (cgi.escape(name,quote=True), cgi.escape(name)))
        indices = ['<p>Built-in modules: ' + cgi.escape(join(builtins, ', '))]
        seen = {}
        for dir in pydoc.pathdirs():
            indices.append(pydoc.html.index(dir, seen))
        print cleanlinks(heading + join(indices))
        return

    if(type(key) != types.ModuleType):
        object = pydoc.locate(key)
        if(object == None and top):
            print "could not locate module/object for key " + \
                   cgi.escape(key) + "<br><a href=\"pydoc:modules\">go to index</a>";
    else:
        object = key
            
    if object:
        print cleanlinks(pydoc.html.page(pydoc.describe(object), pydoc.html.document(object)))
Esempio n. 14
0
File: doc.py Progetto: KaSt/nereamud
def cmd_doc(ch, cmd, arg):
    """Return Python documentation for the specified module, class, function,
       etc... for example:
       
       > doc char.Char

       Will return all available documentation for the Char class.
    """
    if arg == "":
        ch.page("\r\n".join(display.pagedlist({ "Topics" : suggested_reading },
                                              header = "Suggested doc readings include:")))
    else:
        # just because sometimes I forget periods
        arg = arg.replace(" ", ".")

        # are we looking for a shortcut value?
        if arg in shortcuts:
            arg = shortcuts[arg]

        # try to find what we're documenting
        todoc = pydoc.locate(arg)
        if todoc == None:
            ch.send("Could not find Python documentation on: '%s'" % arg)
        else:
            doc = pydoc.TextDoc()
            ch.page(doc.document(todoc).replace("{", "{{"))
Esempio n. 15
0
  def get_summary(self, query, code, cursor_line, cursor_ch, brain):
    elems = self.find_elems_under_cursor(code, cursor_line, cursor_ch, brain)
    elems = sorted(elems, key=lambda x:(len(x.elem), x.elem), reverse=True)
    summary_groups = []
    code_lines = code.split('\n')
    for elem in elems:
      elem_id = elem.elem
      elemdoc = doc_serve.find_elem(elem_id)
      if not elemdoc: continue
      summary = {}
      summary['name'] = elemdoc.full_name.replace('matplotlib.', '')
      summary['elem_id'] = elem_id
      summary['val'] = elemdoc.name
      summary['docstring'] = elemdoc.utter_expand
      if not elemdoc.utter_expand:
        thing = pydoc.locate(elemdoc.full_name)
        if thing:
          summary['docstring'] = pydoc.html.document(thing)
      summary['signature'] = self.get_func_signature(elemdoc)  # only valid for func
      summary['val_text_range'] = elem.getValueTextRange()  # only valid for arg and argkey
      used_items = self.get_used_items_for_elem(elem, elemdoc.type, code_lines)  # valid for all
      summary['suggest'] = self.get_suggest(query, code, cursor_line, cursor_ch, brain, elem_id, used_items, elem)
      summary['type'] = elemdoc.type
      summary_groups.append(summary)

    # A made-up summary, just for suggesting functions.
    summary = {}
    summary['name'] = 'TOP_MODULE'
    summary['elem_id'] = 'TOP_MODULE'
    summary['val'] = 'TOP_MODULE'
    summary['suggest'] = self.get_suggest(query, code, cursor_line, cursor_ch, brain, 'TOP_MODULE')
    summary_groups.append(summary)

    return summary_groups
Esempio n. 16
0
    def test_builtin(self):
        for name in ('str', 'str.translate', '__builtin__.str',
                     '__builtin__.str.translate'):
            # test low-level function
            self.assertIsNotNone(pydoc.locate(name))
            # test high-level function
            try:
                pydoc.render_doc(name)
            except ImportError:
                self.fail('finding the doc of {!r} failed'.format(o))

        for name in ('not__builtin__', 'strrr', 'strr.translate',
                     'str.trrrranslate', '__builtin__.strrr',
                     '__builtin__.str.trrranslate'):
            self.assertIsNone(pydoc.locate(name))
            self.assertRaises(ImportError, pydoc.render_doc, name)
Esempio n. 17
0
def open(filename):
    '''Import netCDF output file as OpenDrift object of correct class'''

    import os
    import logging
    import pydoc
    from netCDF4 import Dataset
    if not os.path.exists(filename):
        logging.info('File does not exist, trying to retrieve from URL')
        import urllib
        try:
            urllib.urlretrieve(filename, 'opendrift_tmp.nc')
            filename = 'opendrift_tmp.nc'
        except:
            raise ValueError('%s does not exist' % filename)
    n = Dataset(filename)
    try:
        module_name = n.opendrift_module
        class_name = n.opendrift_class
    except:
        raise ValueError(filename + ' does not contain '
                         'necessary global attributes '
                         'opendrift_module and opendrift_class')
    n.close()

    cls = pydoc.locate(module_name + '.' + class_name)
    if cls is None:
        from models import oceandrift3D
        cls = oceandrift3D.OceanDrift3D
    o = cls()
    o.io_import_file(filename)
    logging.info('Returning ' + str(type(o)) + ' object')
    return o
Esempio n. 18
0
def serialize_model(obj):
    """
    Locates a models serializer and uses it to serialize a model instance
    This allows us to search a document through all its important components.
    If a attribute of model is important enough to make it to the model
    serializer,
    it means that the models should also be searched though that attribute
    as well. This will take care for all the child models of a model if
    they have been inlined in the serializer.

    For this to work, a model's serializer name has to follow this convention
    '<model_name>Serializer' Failing to do so the function will cause the
    function throw a TypeError exception.
    Only apps in local apps will be indexed.
    """
    app_label = obj._meta.app_label
    serializer_path = "{}{}{}{}".format(
        app_label, ".serializers.", obj.__class__.__name__, 'Serializer')
    serializer_cls = pydoc.locate(serializer_path)
    if not serializer_cls:
        LOGGER.info("Unable to locate a serializer for model {}".format(
            obj.__class__))
    else:
        serialized_data = serializer_cls(obj).data

        serialized_data = json.dumps(serialized_data, default=default)
        return {
            "data": serialized_data,
            "instance_type": obj.__class__.__name__.lower(),
            "instance_id": str(obj.id)
        }
Esempio n. 19
0
 def __init__(
     self, base_modules, destination_directory=".",
     recursion=1, exclusions=(),
     recursion_stops=(),
     formatter = None
 ):
     self.destinationDirectory = os.path.abspath(destination_directory)
     self.exclusions = {}
     self.warnings = []
     self.baseSpecifiers = {}
     self.completed = {}
     self.recursionStops = {}
     self.recursion = recursion
     for stop in recursion_stops:
         self.recursionStops[stop] = 1
     self.pending = []
     for exclusion in exclusions:
         try:
             self.exclusions[exclusion] = pydoc.locate(exclusion)
         except pydoc.ErrorDuringImport:
             self.warn('Unable to import the module {0} which was specified as an exclusion module'.format(
                 repr(exclusion))
             )
     self.formatter = formatter or DefaultFormatter()
     for base in base_modules:
         self.add_base(base)
Esempio n. 20
0
def index_instance(app_label, model_name, instance_id, index_name=INDEX_NAME):
    indexed = False
    elastic_api = ElasticAPI()
    obj_path = "{0}.models.{1}".format(app_label, model_name)
    obj = pydoc.locate(obj_path).objects.get(id=instance_id)
    if not elastic_api._is_on:
        ErrorQueue.objects.get_or_create(
            object_pk=str(obj.pk),
            app_label=obj._meta.app_label,
            model_name=obj.__class__.__name__,
            except_message="Elastic Search is not running",
            error_type="SEARCH_INDEXING_ERROR"
        )
        return indexed

    if confirm_model_is_indexable(obj.__class__):
        data = serialize_model(obj)
        if data:
            elastic_api.index_document(index_name, data)
            LOGGER.info("Indexed {0}".format(data))
            indexed = True
        else:
            LOGGER.info(
                "something unexpected occurred when indexing {} - {}"
                .format(model_name, instance_id)
            )
    else:
        LOGGER.info(
            "Instance of model {} skipped for indexing as it should not be"
            " indexed".format(obj.__class__))
    return indexed
Esempio n. 21
0
def decode_from_dict(field_typestr, value):
    if not value or not field_typestr:
        return value

    if field_typestr == 'datetime.datetime':
        return dateutil.parser.parse(value)
    if field_typestr == 'datetime.timedelta':
        match = timedelta_re.match(value).groupdict()
        for k, v in match.items():
            match[k] = int(v) if v is not None else 0
        return datetime.timedelta(**match)
    if field_typestr == 'datetime.date':
        return dateutil.parser.parse(value).date()
    if field_typestr.startswith('dict'):
        if field_typestr == 'dict':
            return value
        # ensure sensible keys
        assert field_typestr[:9] == 'dict[str,'
        dict_value_typestr = field_typestr[9:-1]
        return {k: decode_from_dict(dict_value_typestr, v) for k, v in value.iteritems()}

    if field_typestr.startswith('list'):
        list_typestr = field_typestr[5:-1]
        return [decode_from_dict(list_typestr, v) for v in value]

    cls = locate(field_typestr)
    if hasattr(cls, '__dictable_public_fields_with_defaults'):
        return from_dict(cls, value)

    return cls(value)
Esempio n. 22
0
 def addBase(self, specifier):
     """Set the base of the documentation set, only children of these modules will be documented"""
     try:
         self.baseSpecifiers[specifier] = pydoc.locate(specifier)
         self.pending.append(specifier)
     except pydoc.ErrorDuringImport, value:
         self.warn("""Unable to import the module %s which was specified as a base module""" % (repr(specifier)))
Esempio n. 23
0
File: h5df.py Progetto: gilesc/h5df
def get_decoder(index_type_str, encoding="utf-8"):
    assert isinstance(index_type_str, str)
    t = locate(index_type_str)
    if t is str:
        return lambda xs: np.char.decode(np.array(xs), encoding)
    elif t is int:
        return lambda xs: np.array(xs)
Esempio n. 24
0
def execute_function(function_request):
    """
    Given a request created by
    `beanstalk_dispatch.common.create_request_body`, executes the
    request.  This function is to be run on a beanstalk worker.
    """
    dispatch_table = getattr(settings, 'BEANSTALK_DISPATCH_TABLE', None)

    if dispatch_table is None:
        raise BeanstalkDispatchError('No beanstalk dispatch table configured')
    for key in (FUNCTION, ARGS, KWARGS):
        if key not in function_request.keys():
            raise BeanstalkDispatchError(
                'Please provide a {} argument'.format(key))

    function_path = dispatch_table.get(
        function_request[FUNCTION], ''
    )

    if function_path:
        # TODO(marcua): Catch import errors and rethrow them as
        # BeanstalkDispatchErrors.
        function = locate(function_path)
        function(*function_request[ARGS], **function_request[KWARGS])
    else:
        raise BeanstalkDispatchError(
            'Requested function not found: {}'.format(
                function_request[FUNCTION]))
Esempio n. 25
0
def metadata():
    json_file = '../src/pytorch-metadata.json'
    json_data = open(json_file).read()
    json_root = json.loads(json_data)

    schema_map = {}

    for entry in json_root:
        name = entry['name']
        schema = entry['schema']
        schema_map[name] = schema

    for entry in json_root:
        name = entry['name']
        schema = entry['schema']
        if 'package' in schema:
            class_name = schema['package'] + '.' + name
            # print(class_name)
            class_definition = pydoc.locate(class_name)
            if not class_definition:
                raise Exception('\'' + class_name + '\' not found.')
            docstring = class_definition.__doc__
            if not docstring:
                raise Exception('\'' + class_name + '\' missing __doc__.')
            # print(docstring)

    with io.open(json_file, 'w', newline='') as fout:
        json_data = json.dumps(json_root, sort_keys=True, indent=2)
        for line in json_data.splitlines():
            line = line.rstrip()
            if sys.version_info[0] < 3:
                line = unicode(line)
            fout.write(line)
            fout.write('\n')
    def test_adds_authorization_header_to_response_with_cookie(self):
        environ['MONGOREST_SETTINGS_MODULE'] = 'tests.fixtures.middlewares_test_auth_settings'

        class TestResource(ListResourceMixin):

            def list(self, request):
                request.environ['session']['test'] = 'test'
                return Response()

        self.test_client = self.client(
            WSGIDispatcher(resources=[TestResource]), Response
        )

        session_store = locate(settings.SESSION_STORE)()
        session = session_store.new()
        session_store.save(session)

        response = self.test_client.get(
            '/', headers=[('Cookie', 'session_id={0}'.format(session.sid))]
        )

        self.assertIn('HTTP_AUTHORIZATION', response.headers)
        self.assertEqual(
            response.headers.get('HTTP_AUTHORIZATION'), 'Token {0}'.format(session.sid)
        )

        environ.pop('MONGOREST_SETTINGS_MODULE')
Esempio n. 27
0
def seed():
    named_fixtures = {}
    for _file in sorted(os.listdir(app.config['SEEDS_PATH'])):
        if not _file.endswith(('yaml', 'yml')):
            continue
        with open(os.path.join(app.config['SEEDS_PATH'], _file), 'r') as f:
            print 'Loading fixtures from {}'.format(_file)
            yaml_doc = yaml.load(f)
            model_class = locate(yaml_doc['model'])
            if not model_class:
                raise RuntimeError('Unable to find model {} from yaml file {}'.format(yaml_doc['model'], _file))
            for fixture in yaml_doc['fixtures']:
                # print named_fixtures
                values = {}
                if isinstance(fixture, dict):
                    # this fixture has no name
                    fixture_data = fixture
                else:
                    # this fixture has a name
                    fixture_data = yaml_doc['fixtures'][fixture]

                for value_key in fixture_data:
                    value = fixture_data[value_key]
                    if isinstance(value, basestring) and value.startswith('~'):
                        values[value_key] = named_fixtures[value[1:]]
                    else:
                        values[value_key] = value
                instance = model_class(**values).save(defer_commit=True)
                if not isinstance(fixture, dict):
                    # store named fixture so other fixtures can reference is
                    named_fixtures[fixture] = instance

        db.session.commit()
Esempio n. 28
0
def callGenerators(imageList, config):
	###we save data in a dict
	###there is 3 parts:
	### imageList: the list of image that are to be deployed
	### linked conf: configuration required for other images of the current image
	### conf: the image's const
	imageNameList = list(imageList.keys())
	imageMeta = {}
	for name in imageNameList:
		imageMeta[name] = imageList[name].getMeta()
	for image in imageList:
		####we generate the data required for this image
		data = {}
		data['imageList'] = imageMeta
		data['config'] = config[image]
		data['linked-config'] = {}
		print("connectors: " + str(imageList[image].getConnectors()))
		for connector in imageList[image].getConnectors():
			print('searching for connector ' + connector)
			if connector in imageList:
				data['linked-config'][connector] = config[connector]
		###we use reflexivity to load the generator pertaining to the image
		print('CALLING GENERATOR FOR ' + image)
		g = locate('assets.' + image + '.Generator')
		gen = g.Generator('assets/' + image + '/', 'output/' + image + '/')
		gen.generate(copy.deepcopy(data))
Esempio n. 29
0
def plot(func):
    try:
        import click
    except ImportError:
        click = None

    if click:
        doc_strings = [f.__doc__ for f in _plot_helper._functions]
        decorators = [click.command()]
        chain = itertools.chain(*(s.split("\n") for s in doc_strings))
        lines1, lines2 = itertools.tee(chain)
        next(lines2, None)
        for line1, line2 in itertools.izip(lines1, lines2):
            if ':' in line1:
                opt, t = [s.strip() for s in line1.split(":")]
                decorators.append(click.option('--' + opt,
                                               type=pydoc.locate(t),
                                               help=line2.strip()))
        decorators.append(wraps(func))
    else:
        decorators = [wraps(func)]

    @_decorate_all(decorators)
    def plotted_func(**kwargs):
        fig, ax = plt.subplots()
        name = func(fig, ax, **kwargs)
        for helper in _plot_helper._functions:
            helper(ax, **kwargs)
        fig.savefig(name)
    return plotted_func
Esempio n. 30
0
def retry_indexing():
    """
    Indexes the the objects that were not indexed on save
    """

    objects_with_errors = ErrorQueue.objects.filter(
        error_type='SEARCH_INDEXING_ERROR')
    for obj in objects_with_errors:
        obj_path = "{}.models.{}".format(obj.app_label, obj.model_name)
        model = pydoc.locate(obj_path)

        try:
            instance = model.objects.get(id=obj.object_pk)
            result = index_instance(
                instance._meta.app_label,
                instance.__class__.__name__,
                instance.id)
            if result:
                obj.delete()
            else:
                obj.retries = obj.retries + 1
                if obj.retries > 2:
                    mail_admins(
                        subject="Update Search Index Error",
                        message="Indexing failed records is failing."
                        " Please check and ensure elasticsearch is up"
                    )
                obj.save()
        except model.DoesNotExist:
            # The related object is already deleted in the database
            LOGGER.info("The record to be indexed has been deleted")
Esempio n. 31
0
 def _import_factory_by_model(self, app_name, model_name):
     return locate(f'{app_name}.factories.{model_name}Factory')
        # logging.basicConfig(filename='debug.log',
        #                     filemode='w',
        #                     format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
        #                     datefmt='%H:%M:%S',
        #                     level=logging.DEBUG)

        parser = argparse.ArgumentParser()
        parser.add_argument('-c',
                            '--company',
                            default='qa',
                            help='Setup the environment')
        parser.add_argument('-n',
                            '--channel',
                            default='channel',
                            help='Setup the ann/mm channel')
        parser.add_argument('-t',
                            '--think_time',
                            default=1,
                            help='Setup the think time')
        paras = parser.parse_args()

        host = locate('service.host.' + paras.company.replace('-', '_'))
        host.think_time = int(paras.think_time)
        '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''
        # Main function
        ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
        get_tokens_async(host, paras)

    except Exception as e:
        logging.error(e)
Esempio n. 33
0
    def get_handler(cls, extension):

        handler = cls.HANDLER_MAP.get(extension)

        if handler:
            return locate(handler)
Esempio n. 34
0
def locate_object(namespaced_name):
    name_components = namespaced_name.split(".")
    module_name = ".".join(name_components[:-1])
    module = locate(module_name)
    class_name = name_components[-1]
    return getattr(module, class_name)
Esempio n. 35
0
 def get_command(self, ctx, name):
     return locate(AVAILABLE_CONVERTERS[name]).cli
Esempio n. 36
0
def get_backend(path, backend):
    """
    Path Example: bothub_backend.bothub.BothubBackend
    Backend Example: https://api.bothub.it
    """
    return locate(path)(backend=backend)
Esempio n. 37
0
 def from_string(s):
     dtype = pydoc.locate("dace.dtypes.{}".format(s))
     if dtype is None or not isinstance(dtype, dace.dtypes.typeclass):
         raise ValueError("Not a valid data type: {}".format(s))
     return dtype
# TODO(Get rid of this or move to data.py)
args.ignore_mask = False
args.gen = False
args.newer = 2
#########################################

args.gen_epochs = 0
args.output_loss = None

if args.reproc:
    seed = 0
    torch.manual_seed(seed)
    np.random.seed(seed)

# fetch data
data = locate('data.get_%s' % args.dataset)(args)

# make dataloaders
train_loader, val_loader, test_loader  = [CLDataLoader(elem, args, train=t) \
        for elem, t in zip(data, [True, False, False])]

if args.log != 'off':
    import wandb
    wandb.init(args.wandb_project)
    wandb.config.update(args)
else:
    wandb = None

# create logging containers
LOG = get_logger(['cls_loss', 'acc'], n_runs=args.n_runs, n_tasks=args.n_tasks)
Esempio n. 39
0
    os.path.abspath(__file__))
os.chdir(os.path.dirname(os.path.abspath(__file__)))
if len(sys.argv) > 1:
    configurations = fuzzConf(sys.argv[1])
else:
    configurations = fuzzConf("default.json")

desc = None
count = 0
crashes = 0
crashCheck = 0
logs = []
mutators = []

for logConf in configurations.logging:
    logClass = locate('Vanapagan.Loging.' + logConf["type"] + "." +
                      logConf["type"])
    if logClass is None:
        raise Exception("Not implemented logging type '%s'" % logConf["type"])
    log = logClass()
    log.setConf(logConf)
    logs.append(log)

for mutConf in configurations.mutators:
    mutClass = locate('Vanapagan.Mutator.' + mutConf["type"] + "." +
                      mutConf["type"])
    if mutClass is None:
        raise Exception("Not implemented mutation type '%s'" % logConf["type"])
    mut = mutClass()
    mut.setConf(mutConf)
    mutators.append(mut)
Esempio n. 40
0
def nameToClass(name):
    """Full module + class name to class
    """
    return locate(name)
Esempio n. 41
0
    path('docs/markdown/', views.markdown_info, name='docs_markdown'),
    path('docs/api/', views.api_info, name='docs_api'),

    path('openapi', get_schema_view(
        title="Django Recipes",
        version=VERSION_NUMBER
    ), name='openapi-schema'),

    path('api/', include((router.urls, 'api'))),
    path('api-auth/', include('rest_framework.urls', namespace='rest_framework')),

]

generic_models = (Recipe, RecipeImport, Storage, RecipeBook, MealPlan, SyncLog, Sync, Comment, RecipeBookEntry, Keyword, Ingredient)

for m in generic_models:
    py_name = get_model_name(m)
    url_name = py_name.replace('_', '-')

    if c := locate(f'cookbook.views.new.{m.__name__}Create'):
        urlpatterns.append(path(f'new/{url_name}/', c.as_view(), name=f'new_{py_name}'))

    if c := locate(f'cookbook.views.edit.{m.__name__}Update'):
        urlpatterns.append(path(f'edit/{url_name}/<int:pk>/', c.as_view(), name=f'edit_{py_name}'))

    if c := getattr(lists, py_name, None):
        urlpatterns.append(path(f'list/{url_name}/', c, name=f'list_{py_name}'))

    if c := locate(f'cookbook.views.delete.{m.__name__}Delete'):
        urlpatterns.append(path(f'delete/{url_name}/<int:pk>/', c.as_view(), name=f'delete_{py_name}'))
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    if not args.model_file:  # if the model_file is not specified
        args.model_file = './models/%s.%s' % (args.model_name, args.model_type)

    for arg in vars(args):
        print('[%s] =' % arg, getattr(args, arg))

    args.win_size = int(args.win_size / 4.0) * 4  # make sure the width of the image can be divided by 4

    # initialize model and constrained optimization problem
    model_class = locate('model_def.%s' % args.model_type)
    model = model_class.Model(model_name=args.model_name, model_file=args.model_file)
    opt_class = locate('constrained_opt_%s' % args.framework)
    opt_solver = opt_class.OPT_Solver(model, batch_size=args.batch_size, d_weight=args.d_weight)
    img_size = opt_solver.get_image_size()
    opt_engine = constrained_opt.Constrained_OPT(opt_solver, batch_size=args.batch_size, n_iters=args.n_iters, topK=args.top_k,
                                                 morph_steps=args.morph_steps, interp=args.interp)

    # initialize application
    app = QApplication(sys.argv)
    window = gui_design.GUIDesign(opt_engine, win_size=args.win_size, img_size=img_size, topK=args.top_k,
                                  model_name=args.model_name, useAverage=args.average, shadow=args.shadow)
    app.setStyleSheet(qdarkstyle.load_stylesheet(pyside=False))  # comment this if you do not like dark stylesheet
    app.setWindowIcon(QIcon('pics/logo.png'))  # load logo
    window.setWindowTitle('Interactive GAN')
    window.setWindowFlags(window.windowFlags() & ~Qt.WindowMaximizeButtonHint)   # fix window siz
Esempio n. 43
0
def class_by_name(class_name: str) -> type:
    clazz = locate(class_name)
    return clazz if clazz is not None else dict  # type: ignore
Esempio n. 44
0
def metadata():
    def parse_docstring(docstring):
        headers = []
        lines = docstring.splitlines()
        indentation = min(
            filter(lambda s: s > 0,
                   map(lambda s: len(s) - len(s.lstrip()), lines)))
        lines = list((s[indentation:] if len(s) > len(s.lstrip()) else s)
                     for s in lines)
        docstring = '\n'.join(lines)
        tag_re = re.compile(
            '(?<=\n)(Args|Arguments|Variables|Fields|Yields|Call arguments|Raises|Examples|Example|Usage|Input shape|Output shape|Returns|References):\n',
            re.MULTILINE)
        parts = tag_re.split(docstring)
        headers.append(('', parts.pop(0)))
        while len(parts) > 0:
            headers.append((parts.pop(0), parts.pop(0)))
        return headers

    def parse_arguments(arguments):
        result = []
        item_re = re.compile(r'^   ? ?(\*?\*?\w[\w.]*?\s*):\s', re.MULTILINE)
        content = item_re.split(arguments)
        if content.pop(0) != '':
            raise Exception('')
        while len(content) > 0:
            result.append((content.pop(0), content.pop(0)))
        return result

    def convert_code_blocks(description):
        lines = description.splitlines()
        output = []
        while len(lines) > 0:
            line = lines.pop(0)
            if line.startswith('>>>') and len(lines) > 0 and (
                    lines[0].startswith('>>>') or lines[0].startswith('...')):
                output.append('```')
                output.append(line)
                while len(lines) > 0 and lines[0] != '':
                    output.append(lines.pop(0))
                output.append('```')
            else:
                output.append(line)
        return '\n'.join(output)

    def remove_indentation(value):
        lines = value.splitlines()
        indentation = min(
            map(lambda s: len(s) - len(s.lstrip()),
                filter(lambda s: len(s) > 0, lines)))
        lines = list((s[indentation:] if len(s) > 0 else s) for s in lines)
        return '\n'.join(lines).strip()

    def update_argument(schema, name, description):
        if not 'attributes' in schema:
            schema['attributes'] = []
        attribute = next((attribute for attribute in schema['attributes']
                          if attribute['name'] == name), None)
        if not attribute:
            attribute = {}
            attribute['name'] = name
            schema['attributes'].append(attribute)
        attribute['description'] = remove_indentation(description)

    def update_input(schema, description):
        if not 'inputs' in schema:
            schema['inputs'] = [{name: 'input'}]
        parameter = next((parameter for parameter in schema['inputs'] if (
            parameter['name'] == 'input' or parameter['name'] == 'inputs')),
                         None)
        if parameter:
            parameter['description'] = remove_indentation(description)
        else:
            raise Exception('')

    def update_output(schema, description):
        if not 'outputs' in schema:
            schema['outputs'] = [{name: 'output'}]
        parameter = next((parameter for parameter in schema['outputs']
                          if parameter['name'] == 'output'), None)
        if parameter:
            parameter['description'] = remove_indentation(description)
        else:
            raise Exception('')

    def update_examples(schema, value):
        if 'examples' in schema:
            del schema['examples']
        value = convert_code_blocks(value)
        lines = value.splitlines()
        code = []
        summary = []
        while len(lines) > 0:
            line = lines.pop(0)
            if len(line) > 0:
                if line.startswith('```'):
                    while len(lines) > 0:
                        line = lines.pop(0)
                        if line == '```':
                            break
                        code.append(line)
                else:
                    summary.append(line)
            if len(code) > 0:
                example = {}
                if len(summary):
                    example['summary'] = '\n'.join(summary)
                example['code'] = '\n'.join(code)
                if not 'examples' in schema:
                    schema['examples'] = []
                schema['examples'].append(example)
                code = []
                summary = []

    def update_references(schema, value):
        if 'references' in schema:
            del schema['references']
        references = []
        reference = ''
        lines = value.splitlines()
        for line in lines:
            if line.lstrip().startswith('- '):
                if len(reference) > 0:
                    references.append(reference)
                reference = line.lstrip().lstrip('- ')
            else:
                if line.startswith('  '):
                    line = line[2:]
                reference = ' '.join([reference, line.strip()])
        if len(reference) > 0:
            references.append(reference)
        for reference in references:
            if not 'references' in schema:
                schema['references'] = []
            schema['references'].append({'description': reference})

    json_path = os.path.join(os.path.dirname(__file__),
                             '../src/keras-metadata.json')
    json_file = open(json_path)
    json_root = json.loads(json_file.read())
    json_file.close()

    for entry in json_root:
        name = entry['name']
        schema = entry['schema']
        if 'package' in schema:
            class_name = schema['package'] + '.' + name
            class_definition = pydoc.locate(class_name)
            if not class_definition:
                raise Exception('\'' + class_name + '\' not found.')
            docstring = class_definition.__doc__
            if not docstring:
                raise Exception('\'' + class_name + '\' missing __doc__.')
            headers = parse_docstring(docstring)
            for header in headers:
                key = header[0]
                value = header[1]
                if key == '':
                    description = convert_code_blocks(value)
                    schema['description'] = remove_indentation(description)
                elif key == 'Args' or key == 'Arguments':
                    arguments = parse_arguments(value)
                    for argument in arguments:
                        update_argument(schema, argument[0], argument[1])
                elif key == 'Call arguments':
                    pass
                elif key == 'Returns':
                    pass
                elif key == 'Input shape':
                    update_input(schema, value)
                elif key == 'Output shape':
                    update_output(schema, value)
                elif key == 'Example' or key == 'Examples' or key == 'Usage':
                    update_examples(schema, value)
                elif key == 'References':
                    update_references(schema, value)
                elif key == 'Variables':
                    pass
                elif key == 'Raises':
                    pass
                else:
                    raise Exception('')

    json_file = open(json_path, 'w')
    json_data = json.dumps(json_root, sort_keys=True, indent=2)
    for line in json_data.splitlines():
        json_file.write(line.rstrip() + '\n')
    json_file.close()
Esempio n. 45
0
 def unpickle(cls, s):
     """
     :rtype: Resource
     """
     className, _json = s.split(':', 1)
     return locate(className)(*json.loads(_json))
Esempio n. 46
0
                    raise Exception("Expected ',' in parameter.")
                line = line[comma+1:]
        index = index + 1
        attribute_lines = []
        while index < len(lines) and (len(lines[index].strip(' ')) == 0 or lines[index].startswith('        ')):
            attribute_lines.append(lines[index].lstrip(' '))
            index = index + 1
        description = '\n'.join(attribute_lines)
        update_attribute(schema, name, description, attribute_type, option, default)

for entry in json_root:
    name = entry['name']
    schema = entry['schema']
    if 'package' in schema:
        class_name = schema['package'] + '.' + name
        class_definition = pydoc.locate(class_name)
        if not class_definition:
            raise Exception('\'' + class_name + '\' not found.')
        docstring = class_definition.__doc__
        if not docstring:
            raise Exception('\'' + class_name + '\' missing __doc__.')
        headers = split_docstring(docstring)
        if '' in headers:
            update_description(schema, headers[''])
        if 'Parameters' in headers:
            update_attributes(schema, headers['Parameters'])

with io.open(json_file, 'w', newline='') as fout:
    json_data = json.dumps(json_root, sort_keys=True, indent=2)
    for line in json_data.splitlines():
        fout.write(line.rstrip())
Esempio n. 47
0
def form_class_is(admin_context, form_class):
    admin_context.form_class = locate(form_class)
    assert admin_context.form_class
Esempio n. 48
0
def _get_runtime(cls_path, options: RuntimeOptions) -> Runtime:
    cls: Any = locate(cls_path)
    return cls(options)
def _build_step(
    step: Union[str, Dict[str, Dict[str, Any]]]
) -> Union[FeatureUnion, Pipeline, BaseEstimator]:
    """
    Build an isolated step within a transformer list, given a dict config

    Parameters
    ----------
    step: dict/str - A dict, with a single key and associated dict
                     where the associated dict are parameters for the
                     given step.

                     Example: {'sklearn.preprocessing.PCA':
                                    {'n_components': 4}
                              }
                        Gives:  PCA(n_components=4)

                    Alternatively, 'step' can be a single string, in
                    which case the step will be initiated w/ default
                    params.

                    Example: 'sklearn.preprocessing.PCA'
                        Gives: PCA()
    Returns
    -------
        Scikit-Learn Transformer or BaseEstimator
    """
    logger.debug(f"Building step: {step}")

    # Here, 'step' _should_ be a dict with a single key
    # and an associated dict containing parameters for the desired
    # sklearn step. ie. {'sklearn.preprocessing.PCA': {'n_components': 2}}
    if isinstance(step, dict):

        if len(step.keys()) != 1:
            raise ValueError(f"Step should have a single key, "
                             f"found multiple: {step.keys()}")

        import_str = list(step.keys())[0]
        params = step.get(import_str, dict())

        # Load any possible classes in the params if this is a dict of maybe kwargs
        if isinstance(params, dict):
            params = _load_param_classes(params)

        # update any param values which are string locations to functions
        if isinstance(params, dict):
            for param, value in params.items():
                if isinstance(value, str):
                    possible_func = pydoc.locate(value)
                    if callable(possible_func):
                        params[param] = possible_func

        StepClass: Union[FeatureUnion, Pipeline,
                         BaseEstimator] = pydoc.locate(import_str)

        if StepClass is None:
            raise ImportError(f'Could not locate path: "{import_str}"')

        # FeatureUnion or another Pipeline transformer
        if any(StepClass == obj
               for obj in [FeatureUnion, Pipeline, Sequential]):

            # Need to ensure the parameters to be supplied are valid FeatureUnion
            # & Pipeline both take a list of transformers, but with different
            # kwarg, here we pull out the list to keep _build_scikit_branch generic
            if "transformer_list" in params:
                params["transformer_list"] = _build_scikit_branch(
                    params["transformer_list"], None)
            elif "steps" in params:
                params["steps"] = _build_scikit_branch(params["steps"], None)

            # If params is an iterable, is has to be the first argument
            # to the StepClass (FeatureUnion / Pipeline); a list of transformers
            elif any(isinstance(params, obj) for obj in (tuple, list)):
                steps = _build_scikit_branch(params, None)
                return StepClass(steps)
            elif isinstance(params, dict) and "layers" in params:
                params["layers"] = _build_branch(params["layers"], None)
            else:
                raise ValueError(f"Got {StepClass} but the supplied parameters"
                                 f"seem invalid: {params}")
        return StepClass(**params)

    # If step is just a string, can initialize it without any params
    # ie. "sklearn.preprocessing.PCA"
    elif isinstance(step, str):
        Step = pydoc.locate(
            step)  # type: Union[FeatureUnion, Pipeline, BaseEstimator]
        return Step()

    else:
        raise ValueError(f"Expected step to be either a string or a dict,"
                         f"found: {type(step)}")
def generate_petastorm_metadata(spark,
                                dataset_url,
                                unischema_class=None,
                                use_summary_metadata=False,
                                hdfs_driver='libhdfs3'):
    """
    Generates metadata necessary to read a petastorm dataset to an existing dataset.

    :param spark: spark session
    :param dataset_url: url of existing dataset
    :param unischema_class: (optional) fully qualified dataset unischema class. If not specified will attempt
        to find one already in the dataset. (e.g.
        :class:`examples.hello_world.generate_hello_world_dataset.HelloWorldSchema`)
    :param hdfs_driver: A string denoting the hdfs driver to use (if using a dataset on hdfs). Current choices are
        libhdfs (java through JNI) or libhdfs3 (C++)
    """
    sc = spark.sparkContext

    resolver = FilesystemResolver(dataset_url,
                                  sc._jsc.hadoopConfiguration(),
                                  hdfs_driver=hdfs_driver)
    fs = resolver.filesystem()
    dataset = pq.ParquetDataset(resolver.get_dataset_path(),
                                filesystem=fs,
                                validate_schema=False)

    if unischema_class:
        schema = locate(unischema_class)
        if not isinstance(schema, Unischema):
            raise ValueError(
                'The specified class %s is not an instance of a petastorm.Unischema object.',
                unischema_class)
    else:

        try:
            schema = get_schema(dataset)
        except ValueError:
            raise ValueError(
                'Unischema class could not be located in existing dataset,'
                ' please specify it')

    # In order to be backwards compatible, we retrieve the common metadata from the dataset before
    # overwriting the metadata to keep row group indexes and the old row group per file index
    arrow_metadata = dataset.common_metadata or None

    with materialize_dataset(spark,
                             dataset_url,
                             schema,
                             use_summary_metadata=use_summary_metadata,
                             pyarrow_filesystem=fs):
        if use_summary_metadata:
            # Inside the materialize dataset context we just need to write the metadata file as the schema will
            # be written by the context manager.
            # We use the java ParquetOutputCommitter to write the metadata file for the existing dataset
            # which will read all the footers of the dataset in parallel and merge them.
            hadoop_config = sc._jsc.hadoopConfiguration()
            Path = sc._gateway.jvm.org.apache.hadoop.fs.Path
            parquet_output_committer = sc._gateway.jvm.org.apache.parquet.hadoop.ParquetOutputCommitter
            parquet_output_committer.writeMetaDataFile(hadoop_config,
                                                       Path(dataset_url))

    spark.stop()

    if use_summary_metadata and arrow_metadata:
        # When calling writeMetaDataFile it will overwrite the _common_metadata file which could have schema information
        # or row group indexers. Therefore we want to retain this information and will add it to the new
        # _common_metadata file. If we were using the old legacy metadata method this file wont be deleted
        base_schema = arrow_metadata.schema.to_arrow_schema()
        metadata_dict = base_schema.metadata
        if ROW_GROUPS_PER_FILE_KEY in metadata_dict:
            add_to_dataset_metadata(dataset, ROW_GROUPS_PER_FILE_KEY,
                                    metadata_dict[ROW_GROUPS_PER_FILE_KEY])
        if ROWGROUPS_INDEX_KEY in metadata_dict:
            add_to_dataset_metadata(dataset, ROWGROUPS_INDEX_KEY,
                                    metadata_dict[ROWGROUPS_INDEX_KEY])
Esempio n. 51
0
    def treat_question(self, question):
        LOGGER.info("Treating, %s %s", question.pk, question.text)
        options = self.tconf.get(survey_name=self.survey.name,
                                 question_text=question.text)
        multiple_charts = options.get("multiple_charts")
        if not multiple_charts:
            multiple_charts = {"": options.get("chart")}
        question_synthesis = ""
        i = 0
        for chart_title, opts in list(multiple_charts.items()):
            i += 1
            if chart_title:
                # "" is False, by default we do not add section or anything
                mct = options["multiple_chart_type"]
                question_synthesis += "\\%s{%s}" % (mct, chart_title)
            tex_type = opts.get("type")
            if tex_type == "raw":
                question_synthesis += Question2TexRaw(question, **opts).tex()
            elif tex_type == "sankey":
                if not SANKEY:
                    raise SankeyNotInstalled()
                other_question_text = opts["question"]
                other_question = Question.objects.get(text=other_question_text)
                q2tex = Question2TexSankey(question,
                                           other_question=other_question)
                question_synthesis += q2tex.tex()
            elif tex_type in ["pie", "cloud", "square", "polar"]:
                q2tex = Question2TexChart(question, latex_label=i, **opts)
                question_synthesis += q2tex.tex()
            elif locate(tex_type) is None:
                msg = "{} '{}' {}".format(
                    _("We could not render a chart because the type"),
                    tex_type,
                    _("is not a standard type nor the path to an "
                      "importable valid Question2Tex child class. "
                      "Choose between 'raw', 'sankey', 'pie', 'cloud', "
                      "'square', 'polar' or 'package.path.MyQuestion2Tex"
                      "CustomClass'"),
                )
                LOGGER.error(msg)
                question_synthesis += msg
            else:
                q2tex_class = locate(tex_type)
                # The use will probably know what type he should use in his
                # custom class
                opts["type"] = None
                q2tex = q2tex_class(question, latex_label=i, **opts)
                question_synthesis += q2tex.tex()
        section_title = Question2Tex.html2latex(question.text)
        return """
\\clearpage{}
\\section{%s}

\\label{sec:%s}

%s

""" % (
            section_title,
            question.pk,
            question_synthesis,
        )
Esempio n. 52
0
def main(argv, neptune_logger=None):
    cfg = BaseConfig().parse(argv)
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu
    save_model_dir = cfg.checkpoint_dir
    if neptune_logger:
        neptune_logger.create_experiment(name=save_model_dir.split('/')[-1],
                                         params=vars(cfg))
    print(save_model_dir)
    model_basename = os.path.basename(save_model_dir)
    touch_dir(save_model_dir)

    args_file = os.path.join(cfg.checkpoint_dir, 'args.json')
    with open(args_file, 'w') as f:
        json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True)
    # os_utils.touch_dir(save_model_dir)

    log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '.txt')
    os_utils.touch_dir(cfg.checkpoint_dir)
    logger = log_utils.create_logger(log_file)

    img_generator_class = locate(cfg.db_tuple_loader)
    args = dict()
    args['db_path'] = cfg.db_path
    args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size
    args['preprocess_func'] = cfg.preprocess_func
    args['batch_size'] = cfg.batch_size
    args['shuffle'] = False
    args['csv_file'] = cfg.train_csv_file
    args['img_size'] = const.max_frame_size
    args['gen_hot_vector'] = True
    train_iter = img_generator_class(args)
    args['batch_size'] = cfg.batch_size
    args['csv_file'] = cfg.test_csv_file
    val_iter = img_generator_class(args)

    trn_images, trn_lbls = train_iter.imgs_and_lbls()
    val_imgs, val_lbls = val_iter.imgs_and_lbls()
    test_imgs, test_lbls = trn_images[:50], trn_lbls[:50]

    with tf.Graph().as_default():
        if cfg.train_mode == 'semi_hard' or cfg.train_mode == 'hard' or cfg.train_mode == 'cntr':
            train_dataset = TripletTupleLoader(trn_images, trn_lbls,
                                               cfg).dataset
            #log_dataset = TripletTupleLoader(test_imgs,test_lbls,cfg).dataset
        elif cfg.train_mode == 'semi_hard_anchor' or cfg.train_mode == 'hard_anchor' or cfg.train_mode == 'cntr_anchor':
            train_dataset = TripletTupleLoaderAnchor(trn_images, trn_lbls,
                                                     cfg).dataset
        elif cfg.train_mode == 'hard_anchor_fossils':
            train_dataset = TripletTupleLoaderAnchor(trn_images, trn_lbls,
                                                     cfg).dataset
        elif cfg.train_mode == 'vanilla':
            train_dataset = QuickTupleLoader(trn_images,
                                             trn_lbls,
                                             cfg,
                                             is_training=True,
                                             shuffle=True,
                                             repeat=True).dataset
        else:
            raise NotImplementedError('{} is not a valid train mode'.format(
                cfg.train_mode))

        val_dataset = QuickTupleLoader(val_imgs,
                                       val_lbls,
                                       cfg,
                                       is_training=False,
                                       repeat=False).dataset
        handle = tf.placeholder(tf.string, shape=[])
        iterator = tf.data.Iterator.from_string_handle(
            handle, train_dataset.output_types, train_dataset.output_shapes)
        images_ph, lbls_ph = iterator.get_next()
        #batch_xs,batch_ys = training_iterator.get_next()

        network_class = locate(cfg.network_name)
        model = network_class(cfg, images_ph=images_ph, lbls_ph=lbls_ph)

        # Which loss fn to impose. For example, softmax only is applied in vanilla mode,
        # while softmax + semi-hard triplet is applied in semi_hard mode.
        if cfg.train_mode == 'semi_hard' or cfg.train_mode == 'semi_hard_anchor':
            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, dim=-1, epsilon=1e-10)
            margin = cfg.margin
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            metric_loss = triplet_semi.triplet_semihard_loss(
                gt_lbls, embedding, margin)
            logger.info('Triplet loss lambda {}, with margin {}'.format(
                cfg.triplet_loss_lambda, margin))
            total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean(
                metric_loss)
        elif cfg.train_mode == 'hard' or cfg.train_mode == 'hard_anchor':
            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, dim=-1, epsilon=1e-10)
            margin = cfg.margin

            logger.info('Triplet loss lambda {}, with margin {}'.format(
                cfg.triplet_loss_lambda, margin))
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            metric_loss = triplet_hard.batch_hard(gt_lbls, embedding, margin)
            total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean(
                metric_loss)
        elif cfg.train_mode == 'hard_fossils' or cfg.train_mode == 'hard_anchor_fossils':
            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, dim=-1, epsilon=1e-10)
            margin = cfg.margin

            logger.info('Triplet loss lambda {}, with margin {}'.format(
                cfg.triplet_loss_lambda, margin))
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            metric_loss_far = triplet_hard.batch_hard_fossils(
                gt_lbls, embedding, margin)
            metric_loss = triplet_hard.batch_hard(gt_lbls, embedding, margin)
            total_loss = model.train_loss + 0.8 * cfg.triplet_loss_lambda * tf.reduce_mean(
                metric_loss) + 0.2 * cfg.triplet_loss_lambda * tf.reduce_mean(
                    metric_loss_far)

        elif cfg.train_mode == 'cntr' or cfg.train_mode == 'cntr_anchor':

            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, dim=-1, epsilon=1e-10)
            CENTER_LOSS_LAMBDA = 0.003
            CENTER_LOSS_ALPHA = 0.5
            num_fg_classes = cfg.num_classes
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            center_loss_order, centroids, centers_update_op, appear_times, diff = center_loss.get_center_loss(
                embedding, gt_lbls, CENTER_LOSS_ALPHA, num_fg_classes)
            # sample_centroid = tf.reshape(tf.gather(centroids, gt_lbls), [-1, config.emb_dim])
            # center_loss_order = center_loss.center_loss(sample_centroid , embedding)
            logger.info('Center loss lambda {}'.format(CENTER_LOSS_LAMBDA))
            total_loss = model.train_loss + CENTER_LOSS_LAMBDA * tf.reduce_mean(
                center_loss_order)

        elif cfg.train_mode == 'vanilla':
            total_loss = model.train_loss

        logger.info('Train Mode {}'.format(cfg.train_mode))
        # variables_to_train = model.var_2_train();
        # logger.info('variables_to_train  ' + str(variables_to_train))

        trainable_vars = tf.trainable_variables()
        if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
            ## Creation of a list of variables with the same shape as the trainable ones
            # initialized with 0s
            accum_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in trainable_vars
            ]
            zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if cfg.train_mode == const.Train_Mode.CNTR:
            update_ops.append(centers_update_op)

        # print(update_ops)

        with tf.control_dependencies(update_ops):

            global_step = tf.Variable(0, name='global_step', trainable=False)
            learning_rate = tf_utils.poly_lr(global_step, cfg)
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)

            if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
                # grads = tf.Print(grads,[grads],'Grad Print');
                grads = optimizer.compute_gradients(total_loss, trainable_vars)
                # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order)
                accum_ops = [
                    accum_vars[i].assign_add(gv[0])
                    for i, gv in enumerate(grads)
                ]
                iter_size = cfg.caffe_iter_size
                # Define the training step (part with variable value update)
                train_op = optimizer.apply_gradients(
                    [(accum_vars[i] / iter_size, gv[1])
                     for i, gv in enumerate(grads)],
                    global_step=global_step)

            else:
                grads = optimizer.compute_gradients(total_loss)
                train_op = optimizer.apply_gradients(grads,
                                                     global_step=global_step)

        sess = tf.InteractiveSession()
        training_iterator = train_dataset.make_one_shot_iterator()

        validation_iterator = val_dataset.make_initializable_iterator()
        training_handle = sess.run(training_iterator.string_handle())
        validation_handle = sess.run(validation_iterator.string_handle())

        tb_path = save_model_dir
        logger.info(tb_path)
        start_iter = tb_utils.get_latest_iteration(tb_path)

        train_writer = tf.summary.FileWriter(tb_path, sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver()  # saves variables learned during training

        ckpt_file = tf.train.latest_checkpoint(save_model_dir)
        logger.info('Model Path {}'.format(ckpt_file))
        load_model_msg = model.load_model(save_model_dir,
                                          ckpt_file,
                                          sess,
                                          saver,
                                          load_logits=False)
        logger.info(load_model_msg)

        ckpt_file = os.path.join(save_model_dir, cfg.checkpoint_filename)

        train_loss = tf.summary.scalar('Train_loss', model.train_loss)
        train_accuracy = tf.summary.scalar('Train_Acc', model.train_accuracy)
        val_loss = tf.summary.scalar('Val_Loss', model.val_loss)
        val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy)
        model_acc_op = tf.summary.scalar('Split_Val_Accuracy',
                                         model.val_accumulated_accuracy)

        best_model_step = 0
        best_acc = 0
        logger.info('Start Training from {}, till {}'.format(
            start_iter, cfg.train_iters))

        # Start Training
        for step in range(start_iter + 1, cfg.train_iters + 1):

            start_time_train = time.time()

            # Update network weights while supporting caffe_iter_size
            for mini_batch in range(cfg.caffe_iter_size - 1):
                feed_dict = {handle: training_handle}
                model_loss_value, accuracy_value, _ = sess.run(
                    [model.train_loss, model.train_accuracy, accum_ops],
                    feed_dict)

            feed_dict = {handle: training_handle}

            model_loss_value, accuracy_value, _ = sess.run(
                [model.train_loss, model.train_accuracy, train_op], feed_dict)

            if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
                sess.run(zero_ops)

            train_time = time.time() - start_time_train
            #training loss
            loss_summary = tf.Summary(value=[
                tf.Summary.Value(tag="Train_loss",
                                 simple_value=model_loss_value)
            ])
            acc_summary = tf.Summary(value=[
                tf.Summary.Value(tag="Train_Acc", simple_value=accuracy_value)
            ])
            train_writer.add_summary(loss_summary, step)
            train_writer.add_summary(acc_summary, step)
            if neptune_logger:
                neptune_logger.log_metric('Train_loss', model_loss_value)
            if cfg.training_mode_debug:
                logger.info(
                    'Training mode debug is ON, will save images every iteration.'
                )
                batch_xs, batch_ys = training_iterator.get_next()
                summary_op = tf.summary.image('image-batch',
                                              batch_xs,
                                              max_outputs=10)
                summary = sess.run(summary_op)
                train_writer.add_summary(summary)

            if (step == 1 or step % cfg.logging_threshold == 0):
                logger.info(
                    'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'.
                    format(step, model_loss_value, accuracy_value, train_time))

                if (step % cfg.test_interval == 0):
                    run_metadata = tf.RunMetadata()
                    tf.local_variables_initializer().run()
                    sess.run(validation_iterator.initializer)

                    _val_acc_op = 0

                    gts = []
                    preds = []
                    pred_3 = []
                    pred_5 = []
                    while True:
                        try:
                            # Eval network on validation/testing split
                            feed_dict = {handle: validation_handle}
                            gt, preds_raw, predictions, val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat, macro_acc = sess.run(
                                [
                                    model.val_gt, model.val_preds,
                                    model.val_class_prediction, val_loss,
                                    model.val_accuracy, model_acc_op,
                                    val_acc_op, model.val_accumulated_accuracy,
                                    model.val_confusion_mat,
                                    model.val_per_class_acc_acc
                                ], feed_dict)
                            gts += list(gt)
                            preds += list(predictions)
                            for g, p in zip(gt, preds_raw):
                                preds_sort_3 = np.argsort(p)[-3:]
                                preds_sort_5 = np.argsort(p)[-5:]
                                if g in preds_sort_3:
                                    pred_3 += [g]
                                else:
                                    pred_3 += [preds_sort_3[-1]]

                                if g in preds_sort_5:
                                    pred_5 += [g]
                                else:
                                    pred_5 += [preds_sort_5[-1]]

                        except tf.errors.OutOfRangeError:
                            logger.info('Val Acc {0}, Macro Acc: {1}'.format(
                                _val_acc, macro_acc))
                            if neptune_logger:
                                neptune_logger.log_metric(
                                    'Validation Accuracy Macro', macro_acc)
                            logger.info('____ Clasification Report Top 1 ____')
                            report = classification_report(gts,
                                                           preds,
                                                           output_dict=True)
                            if neptune_logger:
                                neptune_logger.log_metric(
                                    'Top 1 f-1',
                                    report['weighted avg']['f1-score'])
                                neptune_logger.log_metric(
                                    'Top 1 precision',
                                    report['weighted avg']['precision'])
                                neptune_logger.log_metric(
                                    'Top 1 recall',
                                    report['weighted avg']['recall'])
                            csv_pd = classification_report_csv(report)
                            csv_pd.to_csv(
                                os.path.join(
                                    save_model_dir,
                                    'Classification_Report_top1%04d.csv' %
                                    step))
                            logger.info(report)
                            logger.info('____ Clasification Report Top 3 ____')
                            report = classification_report(gts,
                                                           pred_3,
                                                           output_dict=True)
                            if neptune_logger:
                                neptune_logger.log_metric(
                                    'Top 3 f-1',
                                    report['weighted avg']['f1-score'])
                                neptune_logger.log_metric(
                                    'Top 3 precision',
                                    report['weighted avg']['precision'])
                                neptune_logger.log_metric(
                                    'Top 3 recall',
                                    report['weighted avg']['recall'])
                            csv_pd = classification_report_csv(report)
                            csv_pd.to_csv(
                                os.path.join(
                                    save_model_dir,
                                    'Classification_Report_top3%04d.csv' %
                                    step))
                            logger.info(report)
                            logger.info('____ Clasification Report Top 5 ____')
                            report = classification_report(gts,
                                                           pred_5,
                                                           output_dict=True)
                            if neptune_logger:
                                neptune_logger.log_metric(
                                    'Top 5 f-1',
                                    report['weighted avg']['f1-score'])
                                neptune_logger.log_metric(
                                    'Top 5 precision',
                                    report['weighted avg']['precision'])
                                neptune_logger.log_metric(
                                    'Top 5 recall',
                                    report['weighted avg']['recall'])
                            csv_pd = classification_report_csv(report)
                            csv_pd.to_csv(
                                os.path.join(
                                    save_model_dir,
                                    'Classification_Report_top5%04d.csv' %
                                    step))
                            logger.info(report)
                            break
                    #with train_writer.as_default():

                    batch_xs, batch_ys = training_iterator.get_next()
                    summary_op = tf.summary.image('image-batch',
                                                  batch_xs,
                                                  max_outputs=10)
                    summary = sess.run(summary_op)
                    train_writer.add_summary(summary)
                    train_writer.add_run_metadata(run_metadata,
                                                  'step%03d' % step)
                    train_writer.add_summary(val_loss_op, step)
                    train_writer.add_summary(_val_acc_op, step)
                    train_writer.add_summary(accuracy_op, step)
                    train_writer.flush()

                    if (step % 100 == 0):
                        #log_iterator = log_dataset.make_initializable_iterator()

                        saver.save(sess, ckpt_file)
                        if best_acc < _val_acc:
                            saver.save(sess, ckpt_file + 'best')
                            best_acc = _val_acc
                            best_model_step = step

                        logger.info('Best Acc {0} at {1} == {2}'.format(
                            best_acc, best_model_step, model_basename))

        logger.info('Triplet loss lambda {}'.format(cfg.triplet_loss_lambda))
        logger.info('Mode {}'.format(cfg.train_mode))
        logger.info('Loop complete')
        sess.close()
def main():

    img_generator_class = locate(config.db_tuple_loader)
    args = dict()
    args['csv_file'] = config.train_csv_file
    train_iter = img_generator_class(args)
    args['csv_file'] = config.test_csv_file
    val_iter = img_generator_class(args)

    train_imgs, train_lbls = train_iter.imgs_and_lbls()
    val_imgs, val_lbls = val_iter.imgs_and_lbls()

    save_model_dir = config.model_save_path

    log_file = os.path.join(save_model_dir, "train")
    logging.config.dictConfig(log_utils.get_logging_dict(log_file))
    log = logging.getLogger('train')
    log.info('Data Loading complete')
    with tf.Graph().as_default():

        train_dataset = TensorflowTupleLoader(train_imgs, train_lbls, is_training=True).dataset
        val_dataset = TensorflowTupleLoader(val_imgs, val_lbls, is_training=False, batch_size=config.batch_size,
                                       repeat=False).dataset

        handle = tf.placeholder(tf.string, shape=[])

        iterator = tf.data.Iterator.from_string_handle(
            handle, train_dataset.output_types, train_dataset.output_shapes)
        images_ph, lbls_ph = iterator.get_next()

        training_iterator = train_dataset.make_one_shot_iterator()
        validation_iterator = val_dataset.make_initializable_iterator()

        network_class = locate(config.network_name)
        model = network_class(num_classes=config.num_classes, is_training=True, images_ph=images_ph, lbls_ph=lbls_ph)


        trainable_vars = tf.trainable_variables()
        if config.caffe_iter_size > 1:  ## Accumulated Gradient
            ## Creation of a list of variables with the same shape as the trainable ones
            # initialized with 0s
            accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in trainable_vars]
            zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):

            global_step = tf.Variable(0, name='global_step', trainable=False)
            learning_rate = tf_utils.poly_lr(global_step)
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)

            if config.caffe_iter_size > 1:  ## Accumulated Gradient

                grads = optimizer.compute_gradients(model.train_loss, trainable_vars)
                # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order)
                accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(grads)]
                iter_size = config.caffe_iter_size
                # Define the training step (part with variable value update)
                train_op = optimizer.apply_gradients([(accum_vars[i] / iter_size, gv[1]) for i, gv in enumerate(grads)],
                                                     global_step=global_step)

            else:
                grads = optimizer.compute_gradients(model.train_loss)
                train_op = optimizer.apply_gradients(grads, global_step=global_step)

        # logger.info('=========================================================')
        # for v in tf.trainable_variables():
        #     mprint('trainable_variables:  {0} \t {1}'.format(str(v.name),str(v.shape)))


        sess = tf.InteractiveSession()
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        training_handle = sess.run(training_iterator.string_handle())
        validation_handle = sess.run(validation_iterator.string_handle())


        now = datetime.now()
        if (config.tensorbaord_file == None):
            tb_path = config.tensorbaord_dir + now.strftime("%Y%m%d-%H%M%S")
        else:
            tb_path = config.tensorbaord_dir + config.tensorbaord_file

        start_iter = 1 # No Resume in this code version

        train_writer = tf.summary.FileWriter(tb_path, sess.graph)

        saver = tf.train.Saver()  # saves variables learned during training

        ckpt_file = os.path.join(save_model_dir, config.model_save_name)
        print('Model Path ', ckpt_file)



        load_model_msg = model.load_model(save_model_dir, ckpt_file, sess, saver, is_finetuning=True)
        mprint(load_model_msg,log)


        val_loss = tf.summary.scalar('Val_Loss', model.val_loss)
        val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy)
        model_acc_op = tf.summary.scalar('Split_Val_Accuracy', model.val_accumulated_accuracy)

        mprint('Start Training ***********',log)
        best_acc = 0
        best_model_step = 0
        for current_iter in range(start_iter, config.max_iter+1):
            start_time_train = time.time()

            feed_dict = {handle: training_handle}
            for mini_batch in range(config.caffe_iter_size - 1):
                #feed_dict = {handle: training_handle}
                sess.run(accum_ops, feed_dict)

            model_loss_value, accuracy_value, _ = sess.run([model.train_loss, model.train_accuracy, train_op],
                                                           feed_dict)

            if config.caffe_iter_size > 1:  ## Accumulated Gradient
                sess.run(zero_ops)

            train_time = time.time() - start_time_train


            if (current_iter % config.logging_threshold == 0 or current_iter ==1):
                mprint(
                    'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'.format(current_iter, model_loss_value, accuracy_value,
                                                                                train_time),log)

                if (current_iter % config.test_iteration == 0):
                    run_metadata = tf.RunMetadata()
                    tf.local_variables_initializer().run()
                    sess.run(validation_iterator.initializer)

                    while True:
                        try:
                            feed_dict = {handle: validation_handle}
                            val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat = sess.run(
                                [val_loss, model.val_accuracy, model_acc_op, val_acc_op, model.val_accumulated_accuracy,
                                 model.val_confusion_mat], feed_dict)
                        except tf.errors.OutOfRangeError:
                            mprint('Val Acc {0}'.format(_val_acc),log)
                            break



                    train_writer.add_run_metadata(run_metadata, 'step%03d' % current_iter)
                    train_writer.add_summary(val_loss_op, current_iter)
                    train_writer.add_summary(_val_acc_op, current_iter)
                    train_writer.add_summary(accuracy_op, current_iter)

                    train_writer.flush()


                    if (current_iter % config.logging_threshold == 0):
                        saver.save(sess, ckpt_file)
                        if best_acc < _val_acc:
                            saver.save(sess, ckpt_file + 'best')
                            best_acc = _val_acc
                            best_model_step = current_iter
                        ## Early dropping style.
                        mprint('Best Acc {0} at {1} == {2}'.format(best_acc, best_model_step, config.model_filename),log)

        saver.save(sess, ckpt_file)  ## Save final ckpt before closing
        ckpt = os.path.join(save_model_dir, str(current_iter), config.model_save_name)
        saver.save(sess, ckpt)
        sess.close()
Esempio n. 54
0
def raise_exception(result):
    exc = locate(result["exception"])
    if exc:
        raise exc(*result.get("args", []), **result.get("kwargs", {}))
    else:
        raise TypeError("Couldn't resolve exception {}", result["exception"])
def _load_param_classes(params: dict):
    """
    Inspect the params' values and determine if any can be loaded as a class.
    if so, update that param's key value as the instantiation of the class.

    Additionally, if the value of the top level is a dict, and that dict's len(.keys()) == 1
    AND that key can be loaded, it's assumed to be a class whose associated values
    should be passed in as kwargs.

    Parameters
    ----------
    params: dict
        key value pairs of kwargs, which can have full class paths defined.

    Examples
    --------
    >>> params = {"key1": "value1"}
    >>> assert _load_param_classes(params) == params  # No modifications

    # Load an actual model, without any kwargs
    >>> from sklearn.ensemble import RandomForestRegressor
    >>> params = {"base_estimator": "sklearn.ensemble.forest.RandomForestRegressor"}
    >>> print(_load_param_classes(params))
    {'base_estimator': RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                          max_features='auto', max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators='warn',
                          n_jobs=None, oob_score=False, random_state=None,
                          verbose=0, warm_start=False)}

    # Load an actual model, with kwargs
    >>> params = {"base_estimator": {"sklearn.ensemble.forest.RandomForestRegressor": {"n_estimators": 20}}}
    >>> print(_load_param_classes(params))
    {'base_estimator': RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                          max_features='auto', max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=20,
                          n_jobs=None, oob_score=False, random_state=None,
                          verbose=0, warm_start=False)}


    Returns
    -------
    dict
        Updated params which has any possible class paths loaded up as instantiated
        objects
    """
    params = copy.copy(params)
    for key, value in params.items():

        # If value is a simple string, try to load the model/class
        if isinstance(value, str):
            Model: Union[None, BaseEstimator, Pipeline] = pydoc.locate(value)
            if (Model is not None and isinstance(Model, type)
                    and issubclass(Model, BaseEstimator)):

                params[key] = Model()

        # For the next bit to work, the dict must have a single key (maybe) the class path,
        # and its value must be a dict of kwargs
        elif (isinstance(value, dict) and len(value.keys()) == 1
              and isinstance(value[list(value.keys())[0]], dict)):
            Model = pydoc.locate(list(value.keys())[0])
            if Model is not None and isinstance(Model, type):

                if issubclass(Model, Pipeline) or issubclass(
                        Model, Sequential):
                    # Model is a Pipeline, so 'value' is the definition of that Pipeline
                    # Can can just re-use the entry to building a pipeline.
                    params[key] = pipeline_from_definition(value)
                else:
                    # Call this func again, incase there is nested occurances of this problem in these kwargs
                    sub_params = value[list(value.keys())[0]]
                    kwargs = _load_param_classes(sub_params)
                    params[key] = Model(**kwargs)  # type: ignore
    return params
Esempio n. 56
0
    def _transition_queued_actions_to_pending(self):
        _logger.info('transitioning queued actions to pending')
        action_service = self._action_service
        engine_service = self._engine_service
        datastore_service = self._datastore_service
        assert isinstance(action_service, ActionService)
        assert isinstance(engine_service, EngineService)
        assert isinstance(datastore_service, DatastoreService)
        queued_actions = action_service.find_actions(
            states=[ActionState.QUEUED])
        for action in queued_actions:
            try:
                datastore = datastore_service.get_datastore(
                    action.data.datastore_id, raise_when_missing=False)
                if not datastore or datastore.data.state != DatastoreState.ACTIVE:
                    continue

                states = [
                    ActionState.PENDING, ActionState.RUNNING,
                    ActionState.FINISHING
                ]
                action_count = self._action_service.find_action_count(
                    datastore.id, states)
                if action_count >= datastore.data.concurrency:
                    _logger.info(
                        'datastore (id=%s) has reached max concurrency' %
                        datastore.id)
                    continue

                # conditionally updating queued actions as pending allows multiple concurrent engine workers if needed
                action_service.update_action_state(
                    action=action,
                    state=ActionState.PENDING,
                    error_message=action.data.error_message,
                    conditional=lambda a: a.data.state == ActionState.QUEUED)

                engine = engine_service.get_engine_by_name(
                    action.data.engine_name)

                # our best granualrity of a user_id to identifu who is running this workflow's action.
                datastore_user_id = datastore.data.user_id if hasattr(
                    datastore.data, 'user_id') else 'anonymous'
                if False:  #self.dart_config['dart'].get('use_local_engines'):
                    config = self.dart_config['engines'][engine.data.name]
                    engine_instance = locate(
                        config['path'])(**config.get('options', {}))
                    self._launch_in_memory_engine(engine, engine_instance,
                                                  action, datastore_user_id)
                    # empty string allows differentiation from null, yet is still falsey
                    action_service.update_action_ecs_task_arn(action, '')

                elif engine.data.ecs_task_definition_arn:
                    ecs_task_arn = self._try_run_task(engine, action,
                                                      datastore_user_id)
                    if ecs_task_arn:
                        action_service.update_action_ecs_task_arn(
                            action, ecs_task_arn)
                    else:
                        # no task arn means there isn't enough capacity at the moment, so try again later
                        action_service.update_action_state(
                            action, ActionState.QUEUED,
                            action.data.error_message)

                else:
                    msg = 'engine %s has no ecs_task_definition and local engines are not allowed'
                    raise Exception(msg % engine.data.name)

            except DartConditionalUpdateFailedException:
                # another engine worker picked it up
                continue

            except Exception as e:
                error_message = e.message + '\n\n\n' + traceback.format_exc()
                _logger.error(
                    'error transitioning action (id=%s) to PENDING: %s' %
                    (action.id, error_message))

            finally:
                db.session.rollback()
Esempio n. 57
0
def get_analyzer(analyzer_name):
    args = 'src.analyzers.{analyzer_name}.{analyzer_class}'.format(
        analyzer_name=analyzer_name, analyzer_class=analyzer_name)
    return pydoc.locate(args)()
Esempio n. 58
0
 def get_instance(cls, protocol, **kwargs):
     if protocol in cls.handlers:
         handler_class = locate(cls.handlers[protocol])
         return handler_class(**kwargs) # Calls the handler class with all passed arguments
     raise NotImplementedError(f"{cls.__name__} does not know a Scanner class for the {protocol} protocol.")
Esempio n. 59
0
        'migrate',
        interactive=interactive,
        verbosity=verbosity,
    )


@click.group()
@click.pass_context
def cli(ctx):
    """ESSArch is an open source archival solution
    compliant to the OAIS ISO-standard
    """


list(
    map(lambda cmd: cli.add_command(locate(cmd)), (
        'ESSArch_Core.cli.commands.convert.convert',
        'ESSArch_Core.cli.commands.transform.transform',
        'ESSArch_Core.cli.commands.validate.validate',
    )))


@cli.group()
def search():
    """Manage search indices
    """
    pass


list(
    map(lambda cmd: search.add_command(locate(cmd)), (
Esempio n. 60
0
def main(_argv):
  """Program entry point.
  """

  # Load flags from config file
  if FLAGS.config_path:
    with gfile.GFile(FLAGS.config_path) as config_file:
      config_flags = yaml.load(config_file)
      for flag_key, flag_value in config_flags.items():
        setattr(FLAGS, flag_key, flag_value)

  if isinstance(FLAGS.tasks, string_types):
    FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

  if isinstance(FLAGS.input_pipeline, string_types):
    FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

  input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
      FLAGS.input_pipeline, mode=tf.contrib.learn.ModeKeys.INFER,
      shuffle=False, num_epochs=1)

  # Load saved training options
  train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

  # Create the model
  model_cls = locate(train_options.model_class) or \
    getattr(models, train_options.model_class)
  model_params = train_options.model_params
  model_params = _deep_merge_dict(
      model_params, _maybe_load_yaml(FLAGS.model_params))
  model = model_cls(
      params=model_params,
      mode=tf.contrib.learn.ModeKeys.INFER)

  # Load inference tasks
  hooks = []
  for tdict in FLAGS.tasks:
    if not "params" in tdict:
      tdict["params"] = {}
    task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
    task = task_cls(tdict["params"])
    hooks.append(task)

  # Create the graph used for inference
  predictions, _, _ = create_inference_graph(
      model=model,
      input_pipeline=input_pipeline_infer,
      batch_size=FLAGS.batch_size)

  saver = tf.train.Saver()
  checkpoint_path = FLAGS.checkpoint_path
  if not checkpoint_path:
    checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

  def session_init_op(_scaffold, sess):
    saver.restore(sess, checkpoint_path)
    tf.logging.info("Restored model from %s", checkpoint_path)

  scaffold = tf.train.Scaffold(init_fn=session_init_op)
  session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
  with tf.train.MonitoredSession(
      session_creator=session_creator,
      hooks=hooks) as sess:

    # Run until the inputs are exhausted
    while not sess.should_stop():
      sess.run([])