Ejemplo n.º 1
0
    def _parse_reaction(self, reaction):
        """Able to parse entries from getListOfReactions"""
        identifier = self._strip_reaction_id(reaction.getId())
        name = reaction.getName()
        # parse additional reaction parameters
        try:
            parameters = reaction.getKineticLaw().getListOfParameters()
            params = dict((param.getId().lower(), param.getValue())\
                    for param in parameters)
        except AttributeError:
            params = dict()
            LOGGER.debug("reaction '%s' has no kinetic parameters",
                    reaction.getId())
        # substrates' stoichiometry
        substrates = dict((self.compound_ids[elem.getSpecies()],
                abs(elem.getStoichiometry()))\
                for elem in reaction.getListOfReactants())
        # products' stoichiometry
        products = dict((self.compound_ids[elem.getSpecies()],
                abs(elem.getStoichiometry()))\
                for elem in reaction.getListOfProducts())
        # other information contained in notes
        info = self._parse_notes(reaction)
        # reaction properties and distinguishing suffixes for various
        # compartments cannot be separated easily but suffixes are necessary
#        mobj = self.reaction_property.search(identifier)
#        if mobj:
#            info["properties"] = mobj.group(1)
#            identifier = identifier[:mobj.start(1)]
        self.reaction_ids[reaction.getId()] = pyel.SBMLReaction(
                unique_id=identifier,
                substrates=substrates,
                products=products,
                reversible=reaction.getReversible(),
                name=name, notes=info, **params)
Ejemplo n.º 2
0
def stoichiometry_matrix(metabolites, reactions):
    """
    Return the stoichiometry matrix representation of a set of reactions.

    The reactions and metabolites order is respected. All metabolites are
    expected to be contained and complete in terms of the reactions.

    Parameters
    ----------
    reactions : iterable
        A somehow ordered list of unique reactions.
    metabolites : iterable
        A somehow ordered list of unique metabolites.

    Returns
    -------
    numpy.array
        The 2D array that represents the stoichiometry matrix.
    dict
        A dictionary mapping metabolites to row indexes.
    dict
        A dictionary mapping reactions to column indexes.

    """
    matrix = np.zeros((len(metabolites), len(reactions)))
    met_index = dict((met, i) for i, met in enumerate(metabolites))
    rxn_index = dict()
    for i, rxn in enumerate(reactions):
        rxn_index[rxn] = i
        for met, coef in iteritems(rxn.metabolites):
            j = met_index[met]
            matrix[j, i] = coef
    return matrix, met_index, rxn_index
Ejemplo n.º 3
0
 def __eq__(self, other):
     if isinstance(other, collections.Mapping):
         other = CaseInsensitiveDict(other)
     else:
         return NotImplemented
     # Compare insensitively
     return dict(self.lower_items()) == dict(other.lower_items())
Ejemplo n.º 4
0
def fromAtoB(x1, y1, x2, y2, color='k', connectionstyle="arc3,rad=-0.4",
             shrinkA=10, shrinkB=10, arrowstyle="fancy", ax=None):
    """
    Draws an arrow from point A=(x1,y1) to point B=(x2,y2) on the (optional)
    axis ``ax``.

    .. note::

        See matplotlib documentation.

    """
    if ax is None:
        return pl.annotate("",
                           xy=(x2, y2), xycoords='data',
                           xytext=(x1, y1), textcoords='data',
                           arrowprops=dict(
                               arrowstyle=arrowstyle,  # linestyle="dashed",
                               color=color,
                               shrinkA=shrinkA, shrinkB=shrinkB,
                               patchA=None,
                               patchB=None,
                               connectionstyle=connectionstyle),
                           )
    else:
        return ax.annotate("",
                           xy=(x2, y2), xycoords='data',
                           xytext=(x1, y1), textcoords='data',
                           arrowprops=dict(
                               arrowstyle=arrowstyle,  # linestyle="dashed",
                               color=color,
                               shrinkA=shrinkA, shrinkB=shrinkB,
                               patchA=None,
                               patchB=None,
                               connectionstyle=connectionstyle),
                           )
Ejemplo n.º 5
0
 def __init__(self, global_context, raw_msg):
     """Parse the message, extracts and decode all headers and all
     text parts.
     """
     super(Message, self).__init__(global_context)
     self.raw_msg = self.translate_line_breaks(raw_msg)
     self.msg = email.message_from_string(self.raw_msg)
     self.headers = _Headers()
     self.raw_headers = _Headers()
     self.addr_headers = _Headers()
     self.name_headers = _Headers()
     self.mime_headers = _Headers()
     self.received_headers = list()
     self.raw_mime_headers = _Headers()
     self.header_ips = _Headers()
     self.text = ""
     self.raw_text = ""
     self.uri_list = set()
     self.score = 0
     self.rules_checked = dict()
     self.interpolate_data = dict()
     self.plugin_tags = dict()
     # Data
     self.sender_address = ""
     self.hostname_with_ip = list()
     self.internal_relays = []
     self.external_relays = []
     self.last_internal_relay_index = 0
     self.last_trusted_relay_index = 0
     self.trusted_relays = []
     self.untrusted_relays = []
     self._parse_message()
     self._hook_parsed_metadata()
Ejemplo n.º 6
0
    def summarize_annotation(self, annotation, doc):
        # Strip punctuation for word counts
        wc = len([w for w in doc if re.match('\w+', w)])

        sixltr = sum(len(token) > 6 for token in doc)
        numerals = sum(token.isdigit() for token in doc)
        punct_counts = self._count_punctuation(doc)

        ctr = Counter(list(self._flatten_list_of_sets(annotation)))

        # convert counts to percentile dict
        summary = {k: float(v)/float(wc) for (k,v) in dict(ctr).items()}

        # Set keys that did not occur to 0
        not_counted = { k: 0.0 for k in
                self._parser_keys() - set(summary.keys()) }

        # add non-percentile measures
        summary['wc'] = wc
        summary['analytic'] = self.analytic_thinking_score(summary)
        summary['tone'] = self.emotional_tone_score(summary)
        summary['authentic'] = self.authenticity_score(summary)
        summary['sixltr'] = sixltr
        summary['numerals'] = numerals
        summary['allpct'] = sum(punct_counts.values())

        # Merge the two dictionaries
        return dict(ChainMap(summary, not_counted, punct_counts))
Ejemplo n.º 7
0
def tutalk_login(request, noise=""):
    if logged_in_and_authenticated(request):
        if utils.password_required_to_login(request):
            return redirect('tutalk-users:password-login')
        return redirect("tutalk-users:profile")
    auth_with = None
    ctx = mk_context(breadcrumbs=[dict(label="Login Page")],
                     title="Login Page",
                     login_failed=False,
                     logged_out='logout' in noise)
    if "POST" == request.method:
        utils.LOGGER.debug("%s",
                           json.dumps(dict(request.POST.items()), indent=4))
    if OneAllAuthBackend.KEY in request.POST:
        auth_with = dict(request.POST.items())
    if auth_with:
        user = authenticate(**auth_with)
        if user:
            login(request, user)
            if utils.password_required_to_login(request):
                return redirect('tutalk-users:password-login')
            return redirect("tutalk-users:profile")
        else:
            ctx['login_failed'] = True
    return render(request, LOGIN_TMPL, context=ctx)
Ejemplo n.º 8
0
def jsonify(obj, pretty=False):
    """
    Turn a nested object into a (compressed) JSON string.

    Parameters
    ----------
    obj : dict
        Any kind of dictionary structure.
    pretty : bool, optional
        Whether to format the resulting JSON in a more legible way (
        default False).

    """
    if pretty:
        params = dict(sort_keys=True, indent=2, allow_nan=False,
                      separators=(",", ": "), ensure_ascii=False)
    else:
        params = dict(sort_keys=False, indent=None, allow_nan=False,
                      separators=(",", ":"), ensure_ascii=False)
    try:
        return json.dumps(obj, **params)
    except (TypeError, ValueError) as error:
        LOGGER.critical(
            "The memote result structure is incompatible with the JSON "
            "standard.")
        log_json_incompatible_types(obj)
        raise_with_traceback(error)
Ejemplo n.º 9
0
def _state_chunks(**kwargs):
    """
    Function which parses low data and accounts for formatting
    """
    ret = []
    tmp = {}
    text = []
    contents = []
    for (key, val) in list(kwargs.items()):
        if isinstance(val, str) and '\n' in val:
            if key in ['onlyif', 'unless']:
                val = val.replace('\\\n', '')
                val = val.replace('"', ';;;')
                val = val.replace("'", '\"')
                val = val.replace(';;;', "'")
                val = ' '.join(val.split())
                tmp[key] = val
            elif key in ['text']:
                text = val.split('\n')
            elif key in ['contents']:
                contents = val.split('\n')
        else:
            tmp[key] = val
    if text:
        for line in text:
            ret += [dict(list(tmp.items()) + list({'text': line}.items()))]
    if contents:
        tmp.update({'fun': 'append'})
        for line in contents:
            ret += [dict(list(tmp.items()) + list({'contents': line}.items()))]
    else:
        ret += [tmp]
    return ret
Ejemplo n.º 10
0
 def to_decompartmentalized(self):
     net = MetabolicNetwork(name="decompartmentalized " + self.name,
             **self.graph)
     for rxn in self.reactions:
         # substrates as reaction attribute
         members = dict()
         for (cmpd, factor) in rxn.substrates.items():
             if isinstance(cmpd, pymet.BasicCompartmentCompound):
                 members[cmpd.compound] = factor
             else:
                 members[cmpd] = factor
         rxn.substrates = members
         # substrates in topological sense
         for cmpd in self.predecessors_iter():
             if isinstance(cmpd, pymet.BasicCompartmentCompound):
                 net.add_edge(cmpd.compound, rxn, **self.edge[cmpd][rxn].copy())
             else:
                 net.add_edge(cmpd, rxn, **self.edge[cmpd][rxn].copy())
         # products as reaction attribute
         members = dict()
         for (cmpd, factor) in rxn.products.items():
             if isinstance(cmpd, pymet.BasicCompartmentCompound):
                 members[cmpd.compound] = factor
             else:
                 members[cmpd] = factor
         rxn.products = members
         # products in topological sense
         for cmpd in self.successors_iter():
             if isinstance(cmpd, pymet.BasicCompartmentCompound):
                 net.add_edge(rxn, cmpd.compound, **self.edge[cmpd][rxn].copy())
             else:
                 net.add_edge(rxn, cmpd, **self.edge[cmpd][rxn].copy())
     self.compartments = set()
     return net
Ejemplo n.º 11
0
 def __init__(self, *args, **kwargs):
     if 'bulk_form_attrs' in kwargs:
         bulk_form_attrs = kwargs['bulk_form_attrs']
         del kwargs['bulk_form_attrs']
     else:
         bulk_form_attrs = None
     super(BulkParticipantDeleteForm, self).__init__(*args, **kwargs)
     if bulk_form_attrs is not None:
         # setup first_uid widget constraints
         self.fields["first_uid"].widget.attrs.update(
             dict(min=bulk_form_attrs["update_delete_first_uid_min_value"],
                  max=bulk_form_attrs["update_delete_first_uid_max_value"],
                  required=True))
         # setup first_uid fld validation
         self.fields["first_uid"].min_value = \
             bulk_form_attrs["update_delete_first_uid_min_value"]
         self.fields["first_uid"].max_value = \
             bulk_form_attrs["update_delete_first_uid_max_value"]
         # setup last_uid widget constraints
         self.fields["last_uid"].widget.attrs.update(
             dict(min=bulk_form_attrs["update_delete_last_uid_min_value"],
                  max=bulk_form_attrs["update_delete_last_uid_max_value"],
                  required=True))
         # setup last_uid fld validation
         self.fields["last_uid"].min_value = \
             bulk_form_attrs["update_delete_last_uid_min_value"]
         self.fields["last_uid"].max_value = \
             bulk_form_attrs["update_delete_last_uid_max_value"]
Ejemplo n.º 12
0
    def __init__(self, environ):
        # noinspection PyCallByClass,PyTypeChecker
        configparser.RawConfigParser.__init__(self,
                                              defaults=None,
                                              dict_type=OrderedDict)

        found_conf = False
        conf_files = list(["/usr/local/etc/automx.conf", "/etc/automx.conf"])

        conf = None
        for conf in iter(conf_files):
            if os.path.exists(conf):
                found_conf = True
                break

        if not found_conf:
            raise ConfigNotFoundException("No configuration files found:"
                                          "%s, %s" %
                                          (conf_files[0], conf_files[1]))
        self.read(conf)

        if not self.has_section("automx"):
            raise Exception("Missing section 'automx'")

        if self.has_option("automx", "logfile"):
            self.logfile = self.get("automx", "logfile")
        else:
            self.logfile = None

        if self.has_option("automx", "debug"):
            self.debug = self.getboolean("automx", "debug")
        else:
            self.debug = False

        # We need a home directory for the OpenSSL-rand file
        if self.has_option("automx", "homedir"):
            os.environ["HOME"] = self.get("automx", "homedir")
        else:
            os.environ["HOME"] = "/var/automx"

        self.memcache = Memcache(self, environ)

        # defaults
        self.__emailaddress = ""
        self.__cn = ""
        self.__password = ""
        self.__search_domain = ""
        self.__automx = dict()

        # domain individual settings (overwrites some or all defaults)
        self.__domain = OrderedDict()

        # if we use dynamic backends, we might earn variables
        self.__vars = dict()
Ejemplo n.º 13
0
 def __init__(self, paranoid=False, ignore_unknown=True):
     super(GlobalContext, self).__init__()
     self.plugins = dict()
     self.paranoid = paranoid
     self.ignore_unknown = ignore_unknown
     self.eval_rules = dict()
     self.cmds = dict()
     self.dns = pad.dns_interface.DNSInterface()
     self.networks = pad.networks.NetworkList()
     self.conf = pad.conf.PADConf(self)
     self.username = getpass.getuser()
Ejemplo n.º 14
0
 def _common_hypernyms(self, other):
     '''Helper method for common_hypernyms.'''
     if not isinstance(other, Synset):
         return dict()
     self_dists  = dict(self.hypernym_distances)
     other_dists = dict(other.hypernym_distances)
     common      = dict((synset, 0) for synset in (set(self_dists) &
                                                   set(other_dists)))
     # update the distance values
     for synset in common:
         common[synset] = self_dists[synset] + other_dists[synset]
     return common
Ejemplo n.º 15
0
def _grb___init__(self, name):
    self._model = self._grb.Model(name)
    self._rxn2var = dict()
    self._var2rxn = dict()
    self._rev2var = dict()
    self._var2rev = dict()
    self._cmpd2cnstrnt = dict()
    self._cnstrnt2cmpd = dict()
    self._sources = dict()
    self._drains = dict()
    self._objective = dict()
    self._tmp_lb = dict()
Ejemplo n.º 16
0
    def summarize_annotation(self, annotation, doc):
        wc = len([w for w in doc if re.match('\w+', w)])
        ctr = Counter(list(self._flatten_list_of_sets(annotation)))

        # Convert to percentiles
        summary = {k: float(v)/float(wc) for (k,v) in dict(ctr).items()}

        # Set keys that did not occur to 0
        not_counted = { k: 0.0 for k in
                self._parser_keys() - set(summary.keys()) }

        # Merge the two dictionaries
        return dict(ChainMap(summary, not_counted))
Ejemplo n.º 17
0
 def __init__(self, pattern, words=[], map={}):
     """ Search result returned from Pattern.match(sentence),
         containing a sequence of Word objects.
     """
     self.pattern = pattern
     self.words = words
     self._map1 = dict() # Word index to Constraint.
     self._map2 = dict() # Constraint index to list of Word indices.
     for w in self.words:
         self._map1[w.index] = map[w.index]
     for k, v in self._map1.items():
         self._map2.setdefault(self.pattern.sequence.index(v), []).append(k)
     for k, v in self._map2.items():
         v.sort()
Ejemplo n.º 18
0
 def draw(self, filename, output_format="pdf", layout_program="fdp", layout_args=""):
     import pygraphviz as pgv
     net = pgv.AGraph(directed=True, name=filename, strict=False)
     node_attr= dict()
     link_attr= dict()
     indeces = dict()
     # add reaction nodes
     for (i, rxn) in enumerate(self.nodes_iter()):
         indeces[rxn] = i
         net.add_node(i, label=str(rxn), shape="box", **node_attr)
     # add links
     for (u, v) in self.edges_iter():
         net.add_edge(indeces[u], indeces[v], **link_attr)
     filename = "%s.%s" % (filename, output_format)
     net.draw(filename, prog=layout_program, args=layout_args)
Ejemplo n.º 19
0
def get_sys_info():
    """Returns system information as a dict."""
    blob = dict()
    blob["OS"] = platform.system()
    blob["OS-release"] = platform.release()
    blob["Python"] = platform.python_version()
    return blob
Ejemplo n.º 20
0
    def map_register_name(self, reg):
        if not self._register_names:
            #Fetch register names ...
            result = self._gdb.sync_cmd(["-data-list-register-names"], "done")
            self._register_names = dict(filter(lambda x: x[0] != "", zip(result["register-names"], range(0, 10000))))

        return self._register_names[reg]
Ejemplo n.º 21
0
def json_force(value):
    if isinstance(value, str):
        return json.loads(value)
    elif isinstance(value, dict):
        return value
    else:
        return dict(value)
Ejemplo n.º 22
0
def _parse_xml_tag(tag):
    """Parse an xml tag and return a dict describing it.

    :return: a dict with following keys:
    - is_element:   False for processing instructions, comments, etc.
    - is_opening:   True if tag is element opening tag, False otherwise
    - is_empty:     True if tag is empty element, False otherwise
    - element:      element name (None if not is_element)
    - attributes:   a dict with a key-value pair for each xml attribute
    If parsing fails somehow, return value is None.
    """
    element_regex = re.compile(r'(\w+)', re.U)
    attribute_regex = re.compile(r'''(\w+)\s*=\s*(['"])(.+?)(?<!\\)\2''', re.U)
    tag_description = {
        'is_element': False,
        'is_opening': False,
        'is_empty': False,
        'element': None,
        'attributes': dict(),
    }
    if tag[1] == '!' or tag[1] == '?':
        return tag_description
    elem = re.search(element_regex, tag)
    if elem:
        tag_description['is_element'] = True
        tag_description['element'] = elem.group(1)
        for attr in re.finditer(attribute_regex, tag):
            tag_description['attributes'][attr.group(1)] = attr.group(3)
        if tag[1] != '/':
            tag_description['is_opening'] = True
        if tag[-2] == '/':
            tag_description['is_empty'] = True
        return tag_description
    return None
Ejemplo n.º 23
0
 def render_shell(self):
     sources = self.render_sources()
     match = self._target_pattern.match(self.target)
     target_namespace = dict(TARGET=self.target, SOURCES=sources, MATCH=match)
     return render_template(
             self.SHELL, target_namespace, default_namespace=self.action_namespace
     )
Ejemplo n.º 24
0
 def how_NOT_to_do_it_get_stats_from_broken_ml_segment(self, sm):
     """
     This is left in the code to demonstrate, why after changing the minimal spanning tree,
     we must call sm.load_sampled_elems again. 
     Assigning the stat to the newly added multiloop segment is NOT enough!!!
     """
     sm_copy=copy.deepcopy(sm)
     junction_nodes = set( x for x in sm.bg.find_bulge_loop("m1", 200) if x[0]=="m" )
     missing_nodes = junction_nodes - sm.bg.mst
     missing_node, = missing_nodes
     #Changing the minimal spanning tree:
     sm.bg.mst.remove("m1")        
     sm.bg.mst |= missing_nodes
     sm.bg.build_order = None #No longer valid
     sm.bg.ang_types = None
     build_order = sm.bg.traverse_graph()
     for bo in build_order:
         if bo[1]==missing_node:
             missing_stat=sm.bg.get_bulge_angle_stats_core(missing_node,(bo[0],bo[2]))
             break
     sm.elem_defs[missing_node]=missing_stat
     sm.bg.sampled = dict()
     del sm.elem_defs["m1"]
     sm.traverse_and_build(start='start')
     assertModelsEqual(sm_copy, sm, 11, ignore_keys=["build_order", "mst", "sampled", "elem_defs"]) #sm has different mst and the stats for the missing element from the new mst
Ejemplo n.º 25
0
    def __init__(self, biogrid_file=None, physical_only=True):
        self.statements = []
        self.physical_only = physical_only

        # If a path to the file is included, process it, skipping the header
        if biogrid_file:
            rows = read_unicode_csv(biogrid_file, '\t', skiprows=1)
        # If no file is provided, download from web
        else:
            logger.info('No data file specified, downloading from BioGrid '
                        'at %s' % biogrid_file_url)
            rows = _download_biogrid_data(biogrid_file_url)

        # Process the rows into Statements
        for row in rows:
            filt_row = [None if item == '-' else item for item in row]
            bg_row = _BiogridRow(*filt_row)
            # Filter out non-physical interactions if desired
            if self.physical_only and bg_row.exp_system_type != 'physical':
                continue
            # Ground agents
            agent_a = self._make_agent(bg_row.entrez_a, bg_row.syst_name_a)
            agent_b = self._make_agent(bg_row.entrez_b, bg_row.syst_name_b)
            # Skip any agents with neither HGNC grounding or string name
            if agent_a is None or agent_b is None:
                continue
            # Get evidence
            ev = Evidence(source_api='biogrid',
                          source_id=bg_row.biogrid_int_id,
                          pmid=bg_row.pmid,
                          text=None,
                          annotations=dict(bg_row._asdict()))
            # Make statement
            s = Complex([agent_a, agent_b], evidence=ev)
            self.statements.append(s)
Ejemplo n.º 26
0
    def __init__(this, mbus, base_addr, writeback=False, \
                                        log_level = logging.WARN):
        '''
        note: base_addr will need to be updated every time
        '''
        super( RegFile, this).__init__(mbus)
        this.base_addr = base_addr 

        this.log = m3_logging.getLogger( type(this).__name__)
        this.log.setLevel(log_level)
        
        # specific ordering matching on-board gdb code
        this.names = [  'isr_lr', 'sp', 'r8', 'r9', 'r10', 'r11', 
                        'r4', 'r5', 'r6', 'r7', 'r0', 'r1', 'r2', 
                        'r3', 'r12', 'lr', 'pc', 'xpsr', ]
        this.trans_names = { 'r13': 'sp', 'r14':'lr', 'r15':'pc'}
        # The M0 does not include floating-point registers
        this.warn_names = [ 'f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 
                            'fps', ]
        this.warn_trans_names = { 'cpsr':'xpsr' }                            
        this.offsets = dict( zip(this.names, 
                            range(0, 4* len(this.names), 4))
                          )
        this.writeback = writeback
        this.local =  {}                                
Ejemplo n.º 27
0
    def get_protein_refs(self, hms_lincs_id):
        """Get the refs for a protein from the LINCs protein metadata.

        Parameters
        ----------
        hms_lincs_id : str
            The HMS LINCS ID for the protein

        Returns
        -------
        dict
            A dictionary of protein references.
        """
        # TODO: We could get phosphorylation states from the protein data.
        refs = {'HMS-LINCS': hms_lincs_id}

        entry = self._get_entry_by_id(self._prot_data, hms_lincs_id)
        # If there is no entry for this ID
        if not entry:
            return refs
        mappings = dict(egid='Gene ID', up='UniProt ID')
        for k, v in mappings.items():
            if entry.get(v):
                refs[k.upper()] = entry.get(v)
        return refs
def build_alphabet(spectrum, m):
    """

    :param spectrum: an experimental spectrum
    :param m: the multiplicity threshold
    :return: a convolution spectrum, trimmed to contain only peptide masses appearing m times or more.
    """

    convolutions = dict()

    for i in range(0, len(spectrum)):
        for j in range(i + 1, len(spectrum)):
            diff = spectrum[j] - spectrum[i]
            if diff > 0 and 57 <= diff <= 200:
                if diff in convolutions.keys():
                    convolutions[diff] += 1
                else:
                    convolutions[diff] = 1

    sorted_list = sorted(convolutions.items(), key=operator.itemgetter(1), reverse=True)

    score_to_beat = sorted_list[m - 1][1]

    result = []

    for item in sorted_list:
        if item[1] >= score_to_beat:
            result.append(item[0])

    return result
Ejemplo n.º 29
0
    def get_small_molecule_refs(self, hms_lincs_id):
        """Get the id refs of a small molecule from the LINCS sm metadata.

        Parameters
        ----------
        hms_lincs_id : str
            The HMS LINCS ID of the small molecule.

        Returns
        -------
        dict
            A dictionary of references.
        """
        refs = {'HMS-LINCS': hms_lincs_id}

        entry = self._get_entry_by_id(self._sm_data, hms_lincs_id)
        # If there is no entry for this ID
        if not entry:
            return refs

        # If there is an entry then fill up the refs with existing values
        mappings = dict(chembl='ChEMBL ID', chebi='ChEBI ID',
                        pubchem='PubChem CID', lincs='LINCS ID')
        for k, v in mappings.items():
            if entry.get(v):
                refs[k.upper()] = entry.get(v)
        return refs
Ejemplo n.º 30
0
    def add_multiple_descriptions(self, data, item, overwrite=False,
                                  summary=None):
        """
        Add multiple descriptions to the item in one edit.

        @param data: dictionary of language-description pairs
        @param item: the item to which the descriptions should be added
        @param overwrite: whether any pre-existing descriptions should be
            overwritten (when a new description is available in that language).
        @param summary: summary to append to auto-generated edit summary
        """
        item.exists()  # load contents

        summary = summary or self.edit_summary
        edit_summary = u'Added [{{lang}}] description to [[{qid}]]'.format(
            qid=item.title())
        if summary:
            edit_summary = u'{0}, {1}'.format(edit_summary, summary)

        new_descriptions = dict()

        for lang, desc in data.items():
            if (not item.descriptions or
                    lang not in item.descriptions or
                    overwrite):
                new_descriptions[lang] = desc

        if new_descriptions:
            edit_summary = edit_summary.format(
                lang=', '.join(sorted(new_descriptions.keys())))
            item.editDescriptions(new_descriptions, summary=edit_summary)
            pywikibot.output(edit_summary)
Ejemplo n.º 31
0
    def _partition_query_helper(self, multi_use, w_txn, size=None, max_partitions=None):
        from google.protobuf.struct_pb2 import Struct
        from google.cloud.spanner_v1 import Partition
        from google.cloud.spanner_v1 import PartitionOptions
        from google.cloud.spanner_v1 import PartitionQueryRequest
        from google.cloud.spanner_v1 import PartitionResponse
        from google.cloud.spanner_v1 import Transaction
        from google.cloud.spanner_v1 import TransactionSelector
        from google.cloud.spanner_v1._helpers import _make_value_pb

        new_txn_id = b"ABECAB91"
        token_1 = b"FACE0FFF"
        token_2 = b"BADE8CAF"
        response = PartitionResponse(
            partitions=[
                Partition(partition_token=token_1),
                Partition(partition_token=token_2),
            ],
            transaction=Transaction(id=new_txn_id),
        )
        database = _Database()
        api = database.spanner_api = self._make_spanner_api()
        api.partition_query.return_value = response
        session = _Session(database)
        derived = self._makeDerived(session)
        derived._multi_use = multi_use
        if w_txn:
            derived._transaction_id = TXN_ID

        tokens = list(
            derived.partition_query(
                SQL_QUERY_WITH_PARAM,
                PARAMS,
                PARAM_TYPES,
                partition_size_bytes=size,
                max_partitions=max_partitions,
            )
        )

        self.assertEqual(tokens, [token_1, token_2])

        expected_params = Struct(
            fields={key: _make_value_pb(value) for (key, value) in PARAMS.items()}
        )

        expected_txn_selector = TransactionSelector(id=TXN_ID)

        expected_partition_options = PartitionOptions(
            partition_size_bytes=size, max_partitions=max_partitions
        )

        expected_request = PartitionQueryRequest(
            session=self.SESSION_NAME,
            sql=SQL_QUERY_WITH_PARAM,
            transaction=expected_txn_selector,
            params=expected_params,
            param_types=PARAM_TYPES,
            partition_options=expected_partition_options,
        )
        api.partition_query.assert_called_once_with(
            request=expected_request,
            metadata=[("google-cloud-resource-prefix", database.name)],
        )

        self.assertSpanAttributes(
            "CloudSpanner.PartitionReadWriteTransaction",
            status=StatusCanonicalCode.OK,
            attributes=dict(BASE_ATTRIBUTES, **{"db.statement": SQL_QUERY_WITH_PARAM}),
        )
Ejemplo n.º 32
0
    def normalize(self):
        """normalize the dictionary representation of the workflow"""

        # modify from this:
        #
        #   "workflow": {
        #     "test-and-deploy": {
        #       "resolves": "deploy"
        #     }
        #   }
        #
        # to this:
        #
        #   "workflow": {
        #     "name": "test-and-deploy",
        #     "on": "push",
        #     "resolves": "deploy"
        #   }
        for wf_name, wf_block in dict(self.wf['workflow']).items():
            self.wf['name'] = wf_name
            self.wf['on'] = wf_block.get('on', 'push')
            self.wf['resolves'] = wf_block['resolves']

        # python 2 to 3 compatibility
        try:
            basestring
        except UnboundLocalError:
            basestring = str

        # create a list for all attributes that can be either string or list
        if isinstance(self.wf['resolves'], basestring):
            self.wf['resolves'] = [self.wf['resolves']]
        elif not self.is_list_of_strings(self.wf['resolves']):
            pu.fail('[resolves] must be a list of strings or a string\n')
        if not isinstance(self.wf['on'], basestring):
            pu.fail('[on] attribute must be a string\n')
        for _, a_block in self.wf['action'].items():
            if not isinstance(a_block['uses'], basestring):
                pu.fail('[uses] attribute must be a string\n')
            if a_block.get('needs', None):
                if isinstance(a_block['needs'], basestring):
                    a_block['needs'] = [a_block['needs']]
                elif not self.is_list_of_strings(a_block['needs']):
                    pu.fail(
                        '[needs] attribute must be a list of strings \
                        or a string\n')
            if a_block.get('runs', None):
                if isinstance(a_block['runs'], basestring):
                    a_block['runs'] = [a_block['runs']]
                elif not self.is_list_of_strings(a_block['runs']):
                    pu.fail(
                        '[runs] attribute must be a list of strings \
                        or a string\n')
            if a_block.get('args', None):
                if isinstance(a_block['args'], basestring):
                    a_block['args'] = a_block['args'].split()
                elif not self.is_list_of_strings(a_block['args']):
                    pu.fail(
                        '[args] attribute must be a list of strings \
                        or a string\n')
            if a_block.get('env', None):
                if not isinstance(a_block['env'], dict):
                    pu.fail('[env] attribute must be a dict\n')
            if a_block.get('secrets', None):
                if not self.is_list_of_strings(a_block['secrets']):
                    pu.fail('[secrets] attribute must be a list of strings\n')
Ejemplo n.º 33
0
def grid_values(grid):
    """Convert grid into a dict of {square: char} with '0' or '.' for empties"""
    chars = [c for c in grid if c in digits or c in '0.']
    assert len(chars) == 81
    return dict(zip(squares, chars))
Ejemplo n.º 34
0
        return (s)


# TODO: reuse inverse dict function and fuzzy_get from pugnlp
POS_LABELS = {
    '0': '1',
    'False': 'True',
    'F': 'T',
    'No': 'Yes',
    'N': 'P',
    'None': 'Positive',
    'Neg': 'Pos',
    'Negative': 'Positive',
    "A": "B"
}
POS_LABELS_INVERSE = dict((v, k) for k, v in viewitems(POS_LABELS))
POS_LABELS_LOWER = dict(
    (k.lower(), v.lower()) for k, v in viewitems(POS_LABELS))
POS_LABELS_LOWER_INVERSE = dict(
    (v.lower(), k.lower()) for k, v in viewitems(POS_LABELS))
POS_LABELS_LOWER_INVERSE = dict(
    (k.lower(), v.lower()) for k, v in viewitems(POS_LABELS))
POS_LABELS_LOWER_FIRST = dict(
    (k.lower()[0], v.lower()[0]) for k, v in viewitems(POS_LABELS))
POS_LABELS_LOWER_INVERSE_FIRST = dict(
    (v.lower()[0], k.lower()[0]) for k, v in viewitems(POS_LABELS))
# POS_LABELS_ALL = OrderedDict(list(viewitems(POS_LABELS)) + list(POS_LABELS_LOWER.iteritems())


def infer_pos_label(neg_label=None):
    """Try to guess a positive classification label from a negative label
Ejemplo n.º 35
0
    def _partition_read_helper(
        self, multi_use, w_txn, size=None, max_partitions=None, index=None
    ):
        from google.cloud.spanner_v1.keyset import KeySet
        from google.cloud.spanner_v1 import Partition
        from google.cloud.spanner_v1 import PartitionOptions
        from google.cloud.spanner_v1 import PartitionReadRequest
        from google.cloud.spanner_v1 import PartitionResponse
        from google.cloud.spanner_v1 import Transaction
        from google.cloud.spanner_v1 import TransactionSelector

        keyset = KeySet(all_=True)
        new_txn_id = b"ABECAB91"
        token_1 = b"FACE0FFF"
        token_2 = b"BADE8CAF"
        response = PartitionResponse(
            partitions=[
                Partition(partition_token=token_1),
                Partition(partition_token=token_2),
            ],
            transaction=Transaction(id=new_txn_id),
        )
        database = _Database()
        api = database.spanner_api = self._make_spanner_api()
        api.partition_read.return_value = response
        session = _Session(database)
        derived = self._makeDerived(session)
        derived._multi_use = multi_use
        if w_txn:
            derived._transaction_id = TXN_ID

        tokens = list(
            derived.partition_read(
                TABLE_NAME,
                COLUMNS,
                keyset,
                index=index,
                partition_size_bytes=size,
                max_partitions=max_partitions,
            )
        )

        self.assertEqual(tokens, [token_1, token_2])

        expected_txn_selector = TransactionSelector(id=TXN_ID)

        expected_partition_options = PartitionOptions(
            partition_size_bytes=size, max_partitions=max_partitions
        )

        expected_request = PartitionReadRequest(
            session=self.SESSION_NAME,
            table=TABLE_NAME,
            columns=COLUMNS,
            key_set=keyset._to_pb(),
            transaction=expected_txn_selector,
            index=index,
            partition_options=expected_partition_options,
        )
        api.partition_read.assert_called_once_with(
            request=expected_request,
            metadata=[("google-cloud-resource-prefix", database.name)],
        )

        self.assertSpanAttributes(
            "CloudSpanner.PartitionReadOnlyTransaction",
            status=StatusCanonicalCode.OK,
            attributes=dict(
                BASE_ATTRIBUTES, table_id=TABLE_NAME, columns=tuple(COLUMNS)
            ),
        )
Ejemplo n.º 36
0
def text_to_thai_num(text):
    """รับค่า ''str'' คืนค่า ''str'' เป็นข้อความสู่เลขไทย"""
    thaitonum = dict((x[0], x[2]) for x in p[1:])
    return thaitonum[text]
Ejemplo n.º 37
0
def num_to_text(text):
    """รับค่า ''str'' คืนค่า ''str'' เป็นเลขสู่ข้อความ"""
    thaitonum = dict((x[1], x[0]) for x in p[1:])
    return thaitonum[text]
Ejemplo n.º 38
0
def num_to_thai_num(text):
    """รับค่า ''str'' คืนค่า ''str'' เป็นเลขสู่เลขไทย"""
    thaitonum = dict((x[1], x[2]) for x in p[1:])
    return thaitonum[text]
Ejemplo n.º 39
0
def cross(A, B):
    """Cross product of elements in A and elements in B."""
    return [a + b for a in A for b in B]


digits = '123456789'
rows = 'ABCDEFGHI'
cols = digits
squares = cross(rows, cols)
unit_list = ([cross(rows, c)
              for c in cols] + [cross(r, cols) for r in rows] + [
                  cross(rs, cs) for rs in ('ABC', 'DEF', 'GHI')
                  for cs in ('123', '456', '789')
              ])

units = dict((s, [u for u in unit_list if s in u]) for s in squares)

peers = dict((s, set(sum(units[s], [])) - set([s])) for s in squares)


def test():
    """A set of unit tests"""
    assert len(squares) == 81
    assert len(unit_list) == 27
    assert all(len(units[s]) == 3 for s in squares)
    assert all(len(peers[s]) == 20 for s in peers)

    assert units['C2'] == [[
        'A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2'
    ], ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8',
        'C9'], ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2', 'C3']]
Ejemplo n.º 40
0
 def set_img_to_eval_imgs(self, scores, img_ids, method):
     for img_id, score in zip(img_ids, scores):
         if img_id not in self.img_to_eval:
             self.img_to_eval[img_id] = dict()
             self.img_to_eval[img_id]["image_id"] = img_id
         self.img_to_eval[img_id][method] = score
Ejemplo n.º 41
0
 def __init__(self, img_ids, coco, coco_res):
     self.eval_imgs = []
     self.eval = dict()
     self.img_to_eval = dict()
     self.coco = coco
     self.coco_res = coco_res
Ejemplo n.º 42
0
 def clear_matches(self):
     """Clear any already checked rules."""
     self.rules_checked = dict()
     self.score = 0
Ejemplo n.º 43
0
def getTimeseries(productcode, subproductcode, version, mapsetcode, wkt,
                  start_date, end_date, aggregate):

    #    Extract timeseries from a list of files and return as JSON object
    #    It applies to a single dataset (prod/sprod/version/mapset) and between 2 dates
    #    Several types of aggregation foreseen:
    #
    #       mean :      Sum(Xi)/N(Xi)        -> min/max not considered          e.g. Rain/Vegetation
    #       cumulate:   Sum(Xi)              -> min/max not considered          e.g. Active Fires
    #
    #       count:      N(Xi where min < Xi < max)                              e.g. Active Fires (not used so far)
    #       surface:    count * PixelArea                                       e.g. Water Bodies
    #       percent:    count/Ntot                                              e.g. Vegetation anomalies (not used so far)
    #
    #   History: 1.0 :  Initial release - since 2.0.1 -> now renamed '_green' from greenwich package
    #            1.1 :  Since Feb. 2017, it is based on a different approach (gdal.RasterizeLayer instead of greenwich)
    #                   in order to solve the issue with MULTIPOLYGON
    #

    # Convert the wkt into a geometry
    ogr.UseExceptions()
    theGeomWkt = ' '.join(wkt.strip().split())
    # geom = Geometry(wkt=str(theGeomWkt), srs=4326)
    geom = ogr.CreateGeometryFromWkt(str(theGeomWkt))

    # Get Mapset Info
    mapset_info = querydb.get_mapset(mapsetcode=mapsetcode)

    # Prepare for computing conversion to area: the pixel size at Lat=0 is computed
    # The correction to the actual latitude (on AVERAGE value - will be computed below)
    const_d2km = 12364.35
    area_km_equator = abs(float(mapset_info.pixel_shift_lat)) * abs(
        float(mapset_info.pixel_shift_long)) * const_d2km

    # Get Product Info
    product_info = querydb.get_product_out_info(productcode=productcode,
                                                subproductcode=subproductcode,
                                                version=version)
    if product_info.__len__() > 0:
        # Get info from product_info
        scale_factor = 0
        scale_offset = 0
        nodata = 0
        date_format = ''
        for row in product_info:
            scale_factor = row.scale_factor
            scale_offset = row.scale_offset
            nodata = row.nodata
            date_format = row.date_format
            date_type = row.data_type_id

        # Create an output/temp shapefile, for managing the output layer (really mandatory ?? Can be simplified ???)
        try:
            tmpdir = tempfile.mkdtemp(prefix=__name__,
                                      suffix='_getTimeseries',
                                      dir=es_constants.base_tmp_dir)
        except:
            logger.error('Cannot create temporary dir ' +
                         es_constants.base_tmp_dir + '. Exit')
            raise NameError('Error in creating tmpdir')

        out_shape = tmpdir + os.path.sep + "output_shape.shp"
        outDriver = ogr.GetDriverByName('ESRI Shapefile')

        # Create the output shapefile
        outDataSource = outDriver.CreateDataSource(out_shape)
        dest_srs = ogr.osr.SpatialReference()
        dest_srs.ImportFromEPSG(4326)

        outLayer = outDataSource.CreateLayer("Layer", dest_srs)
        # outLayer = outDataSource.CreateLayer("Layer")
        idField = ogr.FieldDefn("id", ogr.OFTInteger)
        outLayer.CreateField(idField)

        featureDefn = outLayer.GetLayerDefn()
        feature = ogr.Feature(featureDefn)
        feature.SetGeometry(geom)
        feature.SetField("id", 1)
        outLayer.CreateFeature(feature)
        feature = None

        [list_files,
         dates_list] = getFilesList(productcode, subproductcode, version,
                                    mapsetcode, date_format, start_date,
                                    end_date)

        # Built a dictionary with filenames/dates
        dates_to_files_dict = dict(list(zip(dates_list, list_files)))

        # Generate unique list of files
        unique_list = set(list_files)
        uniqueFilesValues = []

        geo_mask_created = False
        for infile in unique_list:
            single_result = {
                'filename': '',
                'meanvalue_noscaling': nodata,
                'meanvalue': None
            }

            if infile.strip() != '' and os.path.isfile(infile):
                # try:

                # Open input file
                orig_ds = gdal.Open(infile, gdal.GA_ReadOnly)
                orig_cs = osr.SpatialReference()
                orig_cs.ImportFromWkt(orig_ds.GetProjectionRef())
                orig_geoT = orig_ds.GetGeoTransform()
                x_origin = orig_geoT[0]
                y_origin = orig_geoT[3]
                pixel_size_x = orig_geoT[1]
                pixel_size_y = -orig_geoT[5]

                in_data_type_gdal = conv_data_type_to_gdal(date_type)

                # Create a mask from the geometry, with the same georef as the input file[s]
                if not geo_mask_created:

                    # Read polygon extent and round to raster resolution
                    x_min, x_max, y_min, y_max = outLayer.GetExtent()
                    x_min_round = int(old_div(
                        (x_min - x_origin),
                        pixel_size_x)) * pixel_size_x + x_origin
                    x_max_round = (
                        int(old_div(
                            (x_max - x_origin),
                            (pixel_size_x))) + 1) * pixel_size_x + x_origin
                    y_min_round = (
                        int(old_div(
                            (y_min - y_origin),
                            (pixel_size_y))) - 1) * pixel_size_y + y_origin
                    y_max_round = int(
                        old_div((y_max - y_origin),
                                (pixel_size_y))) * pixel_size_y + y_origin
                    #
                    #     # Create the destination data source
                    x_res = int(
                        round(
                            old_div((x_max_round - x_min_round),
                                    pixel_size_x)))
                    y_res = int(
                        round(
                            old_div((y_max_round - y_min_round),
                                    pixel_size_y)))
                    #
                    #     # Create mask in memory
                    mem_driver = gdal.GetDriverByName('MEM')
                    mem_ds = mem_driver.Create('', x_res, y_res, 1,
                                               in_data_type_gdal)
                    mask_geoT = [
                        x_min_round, pixel_size_x, 0, y_max_round, 0,
                        -pixel_size_y
                    ]
                    mem_ds.SetGeoTransform(mask_geoT)
                    mem_ds.SetProjection(orig_cs.ExportToWkt())
                    #
                    #     # Create a Layer with '1' for the pixels to be selected
                    gdal.RasterizeLayer(mem_ds, [1], outLayer, burn_values=[1])
                    # gdal.RasterizeLayer(mem_ds, [1], outLayer, None, None, [1])

                    # Read the polygon-mask
                    band = mem_ds.GetRasterBand(1)
                    geo_values = mem_ds.ReadAsArray()

                    # Create a mask from geo_values (mask-out the '0's)
                    geo_mask = ma.make_mask(geo_values == 0)
                    geo_mask_created = True
                    #
                    #     # Clean/Close objects
                    mem_ds = None
                    mem_driver = None
                    outDriver = None
                    outLayer = None

                # Read data from input file
                x_offset = int(old_div((x_min - x_origin), pixel_size_x))
                y_offset = int(old_div((y_origin - y_max), pixel_size_y))

                band_in = orig_ds.GetRasterBand(1)
                data = band_in.ReadAsArray(x_offset, y_offset, x_res, y_res)
                #   Catch the Error ES2-105 (polygon not included in Mapset)
                if data is None:
                    logger.error(
                        'ERROR: polygon extends out of file mapset for file: %s'
                        % infile)
                    return []

                # Create a masked array from the data (considering Nodata)
                masked_data = ma.masked_equal(data, nodata)

                # Apply on top of it the geo mask
                mxnodata = ma.masked_where(geo_mask, masked_data)

                # Test ONLY
                # write_ds_to_geotiff(mem_ds, '/data/processing/exchange/Tests/mem_ds.tif')

                if aggregate['aggregation_type'] == 'count' or aggregate[
                        'aggregation_type'] == 'percent' or aggregate[
                            'aggregation_type'] == 'surface' or aggregate[
                                'aggregation_type'] == 'precip':

                    if mxnodata.count() == 0:
                        meanResult = None
                    else:
                        mxrange = mxnodata
                        min_val = aggregate['aggregation_min']
                        max_val = aggregate['aggregation_max']

                        if min_val is not None:
                            min_val_scaled = old_div((min_val - scale_offset),
                                                     scale_factor)
                            mxrange = ma.masked_less(mxnodata, min_val_scaled)

                            # See ES2-271
                            if max_val is not None:
                                # Scale threshold from physical to digital value
                                max_val_scaled = old_div(
                                    (max_val - scale_offset), scale_factor)
                                mxrange = ma.masked_greater(
                                    mxrange, max_val_scaled)

                        elif max_val is not None:
                            # Scale threshold from physical to digital value
                            max_val_scaled = old_div((max_val - scale_offset),
                                                     scale_factor)
                            mxrange = ma.masked_greater(
                                mxnodata, max_val_scaled)

                        if aggregate['aggregation_type'] == 'percent':
                            # 'percent'
                            meanResult = float(mxrange.count()) / float(
                                mxnodata.count()) * 100

                        elif aggregate['aggregation_type'] == 'surface':
                            # 'surface'
                            # Estimate 'average' Latitude
                            y_avg = (y_min + y_max) / 2.0
                            pixelAvgArea = area_km_equator * math.cos(
                                old_div(y_avg, 180) * math.pi)
                            # This is applicable/important for the WD_GEE (between 0 to 100% -> both avg/occur)
                            # Consider the percent (%) as a weight - see ES2-271
                            # The sum() has to be used - rather than the count() - not to overestimate the avg
                            # (for occur the aline below was also ok).
                            # meanResult = float(mxrange.count()) * pixelAvgArea

                            if productcode == 'wd-gee':
                                meanResult = float(
                                    mxrange.sum() / 100.0) * pixelAvgArea
                            else:
                                meanResult = float(
                                    mxrange.count()) * pixelAvgArea

                        elif aggregate['aggregation_type'] == 'precip':
                            # 'precip'
                            # Estimate 'average' Latitude
                            y_avg = (y_min + y_max) / 2.0
                            pixelAvgArea = area_km_equator * math.cos(
                                old_div(y_avg, 180) * math.pi)
                            # This is applicable/important for the WD_GEE (between 0 to 100% -> both avg/occur)
                            # The correction factor 1E-3 is applied to have the final result in millions m3
                            # Units are: surface: km2 (1E6 m2) - precip: mm (1E-3 m) -> 1E3 m3
                            meanResult = float(
                                mxrange.sum()) * pixelAvgArea * 1e-3

                        else:
                            # 'count'
                            meanResult = float(mxrange.count())

                    # Both results are equal
                    finalvalue = meanResult

                else:  # if aggregate['type'] == 'mean' or if aggregate['type'] == 'cumulate':
                    if mxnodata.count() == 0:
                        finalvalue = None
                        meanResult = None
                    else:
                        if aggregate['aggregation_type'] == 'mean':
                            # 'mean'
                            meanResult = mxnodata.mean()
                        else:
                            # 'cumulate'
                            meanResult = mxnodata.sum()

                        finalvalue = (meanResult * scale_factor + scale_offset)

                # Assign results
                single_result['filename'] = infile
                single_result['meanvalue_noscaling'] = meanResult
                single_result['meanvalue'] = finalvalue

            else:
                logger.debug('ERROR: raster file does not exist - %s' % infile)

            uniqueFilesValues.append(single_result)

        # Define a dictionary to associate filenames/values
        files_to_values_dict = dict(
            (x['filename'], x['meanvalue']) for x in uniqueFilesValues)

        # Prepare array for result
        resultDatesValues = []

        # Returns a list of 'filenames', 'dates', 'values'
        for mydate in dates_list:

            my_result = {'date': datetime.date.today(), 'meanvalue': nodata}

            # Assign the date
            my_result['date'] = mydate
            # Assign the filename
            my_filename = dates_to_files_dict[mydate]

            # Map from array of Values
            my_result['meanvalue'] = files_to_values_dict[my_filename]

            # Map from array of dates
            resultDatesValues.append(my_result)

        try:
            shutil.rmtree(tmpdir)
        except:
            logger.debug('ERROR: Error in deleting tmpdir. Exit')

        # Return result
        return resultDatesValues
    else:
        logger.debug(
            'ERROR: product not registered in the products table! - %s %s %s' %
            (productcode, subproductcode, version))
        return []


# def getTimeseries_green(productcode, subproductcode, version, mapsetcode, wkt, start_date, end_date, aggregate):
#     #    Extract timeseries from a list of files and return as JSON object
#     #    It applies to a single dataset (prod/sprod/version/mapset) and between 2 dates
#     #    Several types of aggregation foreseen:
#     #
#     #       mean :      Sum(Xi)/N(Xi)        -> min/max not considered          e.g. Rain
#     #       cumulate:   Sum(Xi)              -> min/max not considered          e.g. Fire
#     #
#     #       count:      N(Xi where min < Xi < max)                              e.g. Vegetation anomalies
#     #       surface:    count * PixelArea                                       e.g. Water Bodies
#     #       percent:    count/Ntot                                              e.g. Vegetation anomalies
#
#     ogr.UseExceptions()
#     theGeomWkt = ' '.join(wkt.strip().split())
#     geom = Geometry(wkt=str(theGeomWkt), srs=4326)
#
#     # Get Mapset Info
#     mapset_info = querydb.get_mapset(mapsetcode=mapsetcode)
#
#     # Compute pixel area by converting degree to km
#     pixelArea = abs(mapset_info.pixel_shift_lat)*abs(mapset_info.pixel_shift_lat)*12544.0
#
#     # Get Product Info
#     product_info = querydb.get_product_out_info(productcode=productcode,
#                                                 subproductcode=subproductcode,
#                                                 version=version)
#     if product_info.__len__() > 0:
#         scale_factor = 0
#         scale_offset = 0
#         nodata = 0
#         date_format = ''
#         for row in product_info:
#             scale_factor = row.scale_factor
#             scale_offset = row.scale_offset
#             nodata = row.nodata
#             unit = row.unit
#             date_format = row.date_format
#
#         [list_files, dates_list] = getFilesList(productcode, subproductcode, version, mapsetcode, date_format, start_date, end_date)
#
#         # Built a dictionary with filenames/dates
#         dates_to_files_dict = dict(zip(dates_list, list_files))
#
#         # Generate unique list of files
#         unique_list = set(list_files)
#         uniqueFilesValues = []
#
#         for infile in unique_list:
#             single_result = {'filename': '', 'meanvalue_noscaling': nodata, 'meanvalue': None}
#
#             if os.path.isfile(infile):
#                 try:
#                     mx = []
#                     with Raster(infile) as img:
#                         # Assign nodata from prod_info
#                         img._nodata = nodata
#                         with img.clip(geom) as clipped:
#                             # Save clipped image (for debug only)
#                             # clipped.save('/data/processing/exchange/clipped_test.tif')
#                             mx = clipped.array()
#
#                     nodata_array_masked = ma.masked_equal(mx, nodata)
#                     merged_mask = ma.mask_or(ma.getmask(mx), ma.getmask(nodata_array_masked))
#                     mxnodata = ma.masked_array(ma.getdata(mx), merged_mask)
#
#                     if aggregate['aggregation_type'] == 'count' or aggregate['aggregation_type'] == 'percent' or aggregate['aggregation_type'] == 'surface':
#
#                         min_val = aggregate['aggregation_min']
#                         max_val = aggregate['aggregation_max']
#                         # Scale threshold from physical to digital value
#                         min_val_scaled = (min_val-scale_offset)/scale_factor
#                         max_val_scaled = (max_val-scale_offset)/scale_factor
#                         mxrange = ma.masked_outside(mxnodata, min_val_scaled, max_val_scaled)
#
#                         if aggregate['aggregation_type'] == 'percent':
#                             # 'percent'
#                             meanResult = float(mxrange.count())/float(mxnodata.count()) * 100
#
#                         elif aggregate['aggregation_type'] == 'surface':
#                             # 'surface'
#                             meanResult = float(mxrange.count())* pixelArea
#                         else:
#                             # 'count'
#                             meanResult = float(mxrange.count())
#
#                         # Both results are equal
#                         finalvalue = meanResult
#
#                     else:   #if aggregate['type'] == 'mean' or if aggregate['type'] == 'cumulate':
#                         if mxnodata.count() == 0:
#                             meanResult = 0.0
#                         else:
#                             if aggregate['aggregation_type'] == 'mean':
#                                 # 'mean'
#                                 meanResult = mxnodata.mean()
#                             else:
#                                 # 'cumulate'
#                                 meanResult = mxnodata.sum()
#
#                         # Scale to physical value
#                         finalvalue = (meanResult*scale_factor+scale_offset)
#
#                     # Assign results
#                     single_result['filename'] = infile
#                     single_result['meanvalue_noscaling'] = meanResult
#                     single_result['meanvalue'] = finalvalue
#
#                 except Exception, e:
#                     logger.debug('ERROR: clipping - %s' % (e))
#                     # sys.exit (1)
#             else:
#                 logger.debug('ERROR: raster file does not exist - %s' % infile)
#                 # sys.exit (1)
#
#             uniqueFilesValues.append(single_result)
#
#         # Define a dictionary to associate filenames/values
#         files_to_values_dict = dict((x['filename'], x['meanvalue']) for x in uniqueFilesValues)
#
#         # Prepare array for result
#         resultDatesValues = []
#
#         # Returns a list of 'filenames', 'dates', 'values'
#         for mydate in dates_list:
#             # my_result = {'date': datetime.date.today(), 'filename':'', 'meanvalue':nodata}
#             my_result = {'date': datetime.date.today(), 'meanvalue':nodata}
#
#             # Assign the date
#             my_result['date'] = mydate
#             # Assign the filename
#             my_filename = dates_to_files_dict[mydate]
#             # my_result['filename'] = my_filename
#             # Map from array of Values
#             my_result['meanvalue'] = files_to_values_dict[my_filename]
#
#             # Map from array of dates
#             resultDatesValues.append(my_result)
#
#         return resultDatesValues
#     else:
#         logger.debug('ERROR: product not registered in the products table! - %s %s %s' % (productcode, subproductcode, version))
#         return []
Ejemplo n.º 44
0
from pymongo import MongoClient
import functools
import math
import sys
try:
    import repoze.lru
except ImportError:
    pass

LONG_POS_TO_SHORT = {
    'verben': 'v',
    'nomen': 'n',
    'adj': 'j',
}

SHORT_POS_TO_LONG = dict((v, k) for (k, v) in LONG_POS_TO_SHORT.items())

DEFAULT_CACHE_SIZE = 100

GERMANET_METAINFO_IGNORE_KEYS = set(['_id'])


class GermaNet(object):
    '''A class representing the GermaNet database.'''
    def __init__(self, mongo_db, cache_size=DEFAULT_CACHE_SIZE):
        '''
        Creates a new GermaNet object.

        Arguments:
        - `mongo_db`: a pymongo.database.Database object containing
          the GermaNet lexicon
Ejemplo n.º 45
0
    def _execute_sql_helper(
        self,
        multi_use,
        first=True,
        count=0,
        partition=None,
        sql_count=0,
        query_options=None,
        timeout=google.api_core.gapic_v1.method.DEFAULT,
        retry=google.api_core.gapic_v1.method.DEFAULT,
    ):
        from google.protobuf.struct_pb2 import Struct
        from google.cloud.spanner_v1 import (
            PartialResultSet,
            ResultSetMetadata,
            ResultSetStats,
        )
        from google.cloud.spanner_v1 import (
            TransactionSelector,
            TransactionOptions,
        )
        from google.cloud.spanner_v1 import ExecuteSqlRequest
        from google.cloud.spanner_v1 import Type, StructType
        from google.cloud.spanner_v1 import TypeCode
        from google.cloud.spanner_v1._helpers import (
            _make_value_pb,
            _merge_query_options,
        )

        VALUES = [[u"bharney", u"rhubbyl", 31], [u"phred", u"phlyntstone", 32]]
        MODE = 2  # PROFILE
        struct_type_pb = StructType(
            fields=[
                StructType.Field(name="first_name", type_=Type(code=TypeCode.STRING)),
                StructType.Field(name="last_name", type_=Type(code=TypeCode.STRING)),
                StructType.Field(name="age", type_=Type(code=TypeCode.INT64)),
            ]
        )
        metadata_pb = ResultSetMetadata(row_type=struct_type_pb)
        stats_pb = ResultSetStats(
            query_stats=Struct(fields={"rows_returned": _make_value_pb(2)})
        )
        result_sets = [
            PartialResultSet(metadata=metadata_pb),
            PartialResultSet(stats=stats_pb),
        ]
        for i in range(len(result_sets)):
            result_sets[i].values.extend(VALUES[i])
        iterator = _MockIterator(*result_sets)
        database = _Database()
        api = database.spanner_api = self._make_spanner_api()
        api.execute_streaming_sql.return_value = iterator
        session = _Session(database)
        derived = self._makeDerived(session)
        derived._multi_use = multi_use
        derived._read_request_count = count
        derived._execute_sql_count = sql_count
        if not first:
            derived._transaction_id = TXN_ID

        result_set = derived.execute_sql(
            SQL_QUERY_WITH_PARAM,
            PARAMS,
            PARAM_TYPES,
            query_mode=MODE,
            query_options=query_options,
            partition=partition,
            retry=retry,
            timeout=timeout,
        )

        self.assertEqual(derived._read_request_count, count + 1)

        if multi_use:
            self.assertIs(result_set._source, derived)
        else:
            self.assertIsNone(result_set._source)

        self.assertEqual(list(result_set), VALUES)
        self.assertEqual(result_set.metadata, metadata_pb)
        self.assertEqual(result_set.stats, stats_pb)

        txn_options = TransactionOptions(
            read_only=TransactionOptions.ReadOnly(strong=True)
        )

        if multi_use:
            if first:
                expected_transaction = TransactionSelector(begin=txn_options)
            else:
                expected_transaction = TransactionSelector(id=TXN_ID)
        else:
            expected_transaction = TransactionSelector(single_use=txn_options)

        expected_params = Struct(
            fields={key: _make_value_pb(value) for (key, value) in PARAMS.items()}
        )

        expected_query_options = database._instance._client._query_options
        if query_options:
            expected_query_options = _merge_query_options(
                expected_query_options, query_options
            )

        expected_request = ExecuteSqlRequest(
            session=self.SESSION_NAME,
            sql=SQL_QUERY_WITH_PARAM,
            transaction=expected_transaction,
            params=expected_params,
            param_types=PARAM_TYPES,
            query_mode=MODE,
            query_options=expected_query_options,
            partition_token=partition,
            seqno=sql_count,
        )
        api.execute_streaming_sql.assert_called_once_with(
            request=expected_request,
            metadata=[("google-cloud-resource-prefix", database.name)],
            timeout=timeout,
            retry=retry,
        )

        self.assertEqual(derived._execute_sql_count, sql_count + 1)

        self.assertSpanAttributes(
            "CloudSpanner.ReadWriteTransaction",
            status=StatusCanonicalCode.OK,
            attributes=dict(BASE_ATTRIBUTES, **{"db.statement": SQL_QUERY_WITH_PARAM}),
        )
Ejemplo n.º 46
0
    def _read_helper(self, multi_use, first=True, count=0, partition=None):
        from google.protobuf.struct_pb2 import Struct
        from google.cloud.spanner_v1 import (
            PartialResultSet,
            ResultSetMetadata,
            ResultSetStats,
        )
        from google.cloud.spanner_v1 import (
            TransactionSelector,
            TransactionOptions,
        )
        from google.cloud.spanner_v1 import ReadRequest
        from google.cloud.spanner_v1 import Type, StructType
        from google.cloud.spanner_v1 import TypeCode
        from google.cloud.spanner_v1.keyset import KeySet
        from google.cloud.spanner_v1._helpers import _make_value_pb

        VALUES = [[u"bharney", 31], [u"phred", 32]]
        struct_type_pb = StructType(
            fields=[
                StructType.Field(name="name", type_=Type(code=TypeCode.STRING)),
                StructType.Field(name="age", type_=Type(code=TypeCode.INT64)),
            ]
        )
        metadata_pb = ResultSetMetadata(row_type=struct_type_pb)
        stats_pb = ResultSetStats(
            query_stats=Struct(fields={"rows_returned": _make_value_pb(2)})
        )
        result_sets = [
            PartialResultSet(metadata=metadata_pb),
            PartialResultSet(stats=stats_pb),
        ]
        for i in range(len(result_sets)):
            result_sets[i].values.extend(VALUES[i])
        KEYS = [["*****@*****.**"], ["*****@*****.**"]]
        keyset = KeySet(keys=KEYS)
        INDEX = "email-address-index"
        LIMIT = 20
        database = _Database()
        api = database.spanner_api = self._make_spanner_api()
        api.streaming_read.return_value = _MockIterator(*result_sets)
        session = _Session(database)
        derived = self._makeDerived(session)
        derived._multi_use = multi_use
        derived._read_request_count = count
        if not first:
            derived._transaction_id = TXN_ID

        if partition is not None:  # 'limit' and 'partition' incompatible
            result_set = derived.read(
                TABLE_NAME, COLUMNS, keyset, index=INDEX, partition=partition
            )
        else:
            result_set = derived.read(
                TABLE_NAME, COLUMNS, keyset, index=INDEX, limit=LIMIT
            )

        self.assertEqual(derived._read_request_count, count + 1)

        if multi_use:
            self.assertIs(result_set._source, derived)
        else:
            self.assertIsNone(result_set._source)

        self.assertEqual(list(result_set), VALUES)
        self.assertEqual(result_set.metadata, metadata_pb)
        self.assertEqual(result_set.stats, stats_pb)

        txn_options = TransactionOptions(
            read_only=TransactionOptions.ReadOnly(strong=True)
        )

        if multi_use:
            if first:
                expected_transaction = TransactionSelector(begin=txn_options)
            else:
                expected_transaction = TransactionSelector(id=TXN_ID)
        else:
            expected_transaction = TransactionSelector(single_use=txn_options)

        if partition is not None:
            expected_limit = 0
        else:
            expected_limit = LIMIT

        expected_request = ReadRequest(
            session=self.SESSION_NAME,
            table=TABLE_NAME,
            columns=COLUMNS,
            key_set=keyset._to_pb(),
            transaction=expected_transaction,
            index=INDEX,
            limit=expected_limit,
            partition_token=partition,
        )
        api.streaming_read.assert_called_once_with(
            request=expected_request,
            metadata=[("google-cloud-resource-prefix", database.name)],
        )

        self.assertSpanAttributes(
            "CloudSpanner.ReadOnlyTransaction",
            attributes=dict(
                BASE_ATTRIBUTES, table_id=TABLE_NAME, columns=tuple(COLUMNS)
            ),
        )
Ejemplo n.º 47
0
def _get_package_versions_and_locations():
    import warnings
    from ._auto_deps import package_imports, global_deprecation_messages, deprecation_messages, \
        runtime_warning_messages, warning_imports, ignorable

    def package_dir(srcfile):
        return os.path.dirname(os.path.dirname(os.path.normcase(os.path.realpath(srcfile))))

    # pkg_resources.require returns the distribution that pkg_resources attempted to put
    # on sys.path, which can differ from the one that we actually import due to #1258,
    # or any other bug that causes sys.path to be set up incorrectly. Therefore we
    # must import the packages in order to check their versions and paths.

    # This is to suppress all UserWarnings and various DeprecationWarnings and RuntimeWarnings
    # (listed in _auto_deps.py).

    warnings.filterwarnings("ignore", category=UserWarning, append=True)

    for msg in global_deprecation_messages + deprecation_messages:
        warnings.filterwarnings("ignore", category=DeprecationWarning, message=msg, append=True)
    for msg in runtime_warning_messages:
        warnings.filterwarnings("ignore", category=RuntimeWarning, message=msg, append=True)
    try:
        for modulename in warning_imports:
            try:
                __import__(modulename)
            except (ImportError, SyntaxError):
                pass
    finally:
        # Leave suppressions for UserWarnings and global_deprecation_messages active.
        for _ in runtime_warning_messages + deprecation_messages:
            warnings.filters.pop()

    packages = []
    pkg_resources_vers_and_locs = dict()

    if not hasattr(sys, 'frozen'):
        pkg_resources_vers_and_locs = {
            p.project_name.lower(): (str(p.version), p.location)
            for p
            in pkg_resources.require(_INSTALL_REQUIRES)
        }

    def get_version(module):
        if hasattr(module, '__version__'):
            return str(getattr(module, '__version__'))
        elif hasattr(module, 'version'):
            ver = getattr(module, 'version')
            if isinstance(ver, tuple):
                return '.'.join(map(str, ver))
            else:
                return str(ver)
        else:
            return 'unknown'

    for pkgname, modulename in [(__appname__, 'allmydata')] + package_imports:
        if modulename:
            try:
                __import__(modulename)
                module = sys.modules[modulename]
            except (ImportError, SyntaxError):
                etype, emsg, etrace = sys.exc_info()
                trace_info = (etype, str(emsg), ([None] + traceback.extract_tb(etrace))[-1])
                packages.append( (pkgname, (None, None, trace_info)) )
            else:
                comment = None
                if pkgname == __appname__:
                    comment = "%s: %s" % (branch, full_version)
                elif pkgname == 'setuptools' and hasattr(module, '_distribute'):
                    # distribute does not report its version in any module variables
                    comment = 'distribute'
                ver = get_version(module)
                loc = package_dir(module.__file__)
                if ver == "unknown" and pkgname in pkg_resources_vers_and_locs:
                    (pr_ver, pr_loc) = pkg_resources_vers_and_locs[pkgname]
                    if loc == os.path.normcase(os.path.realpath(pr_loc)):
                        ver = pr_ver
                packages.append( (pkgname, (ver, loc, comment)) )
        elif pkgname == 'python':
            packages.append( (pkgname, (platform.python_version(), sys.executable, None)) )
        elif pkgname == 'platform':
            packages.append( (pkgname, (_get_platform(), None, None)) )
        elif pkgname == 'OpenSSL':
            packages.append( (pkgname, _get_openssl_version()) )

    cross_check_errors = []

    if len(pkg_resources_vers_and_locs) > 0:
        imported_packages = set([p.lower() for (p, _) in packages])
        extra_packages = []

        for pr_name, (pr_ver, pr_loc) in pkg_resources_vers_and_locs.items():
            if pr_name not in imported_packages and pr_name not in ignorable:
                extra_packages.append( (pr_name, (pr_ver, pr_loc, "according to pkg_resources")) )

        cross_check_errors = _cross_check(pkg_resources_vers_and_locs, packages)
        packages += extra_packages

    return packages, cross_check_errors
Ejemplo n.º 48
0
def model_resquiggle(f5_dirs1,
                     corr_group,
                     bc_subgrps,
                     tb_model_fn,
                     bio_samp_type,
                     z_trans_lag,
                     p_value_thresh,
                     reg_context,
                     base_reg_context,
                     max_base_shift,
                     b_max_base_shift,
                     min_obs_per_base,
                     base_space_iters,
                     compute_sd,
                     new_corr_grp,
                     num_processes,
                     overwrite,
                     in_place=True):
    z_thresh = ts.p_value_to_z_score(p_value_thresh)
    raw_read_coverage = th.parse_fast5s(f5_dirs1, corr_group, bc_subgrps,
                                        new_corr_grp)

    if tb_model_fn is None:
        tb_model_fn, bio_samp_type = ts.get_default_standard_ref_from_files(
            fast5_fns, bio_samp_type)

    # load reads into Queue
    manager = mp.Manager()
    reads_q = manager.Queue()
    failed_reads_q = manager.Queue()

    # group reads by filename so slot is not deleted in 2D reads
    fn_grouped_reads = defaultdict(list)
    for cs_reads in raw_read_coverage.values():
        for r_data in cs_reads:
            fn_grouped_reads[r_data.fn].append(r_data)
    num_reads = 0
    for fn_reads in fn_grouped_reads.values():
        reads_q.put(fn_reads)
        num_reads += 1

    mod_rsqgl_args = (reads_q, failed_reads_q, tb_model_fn, z_trans_lag,
                      z_thresh, reg_context, base_reg_context, max_base_shift,
                      b_max_base_shift, min_obs_per_base, base_space_iters,
                      new_corr_grp, compute_sd, overwrite, in_place,
                      corr_group)
    mod_rsqgl_ps = []
    for p_id in range(num_processes):
        p = mp.Process(target=model_resquiggle_worker, args=mod_rsqgl_args)
        p.start()
        mod_rsqgl_ps.append(p)

    if VERBOSE:
        sys.stderr.write('Correcting ' + unicode(num_reads) + ' files with ' +
                         unicode(len(bc_subgrps)) + ' subgroup(s)/read(s) ' +
                         'each (Will print a dot for each ' +
                         unicode(PROGRESS_INTERVAL) + ' reads completed).\n')
    failed_reads = defaultdict(list)
    while any(p.is_alive() for p in mod_rsqgl_ps):
        try:
            errorType, fn = failed_reads_q.get(block=False)
            failed_reads[errorType].append(fn)
        except queue.Empty:
            sleep(1)
            continue
    while not failed_reads_q.empty():
        errorType, fn = failed_reads_q.get(block=False)
        failed_reads[errorType].append(fn)

    # print newline after read progress dots
    if VERBOSE: sys.stderr.write('\n')

    return dict(failed_reads)
Ejemplo n.º 49
0
def error(code, msg):
    return HTTPError(code, dumps(msg), **dict(response.headers))
Ejemplo n.º 50
0
def get_package_versions():
    return dict([(k, v) for k, (v, l, c) in _vers_and_locs_list])
Ejemplo n.º 51
0
def launch_dlcs_excel(sim_id, silent=False, verbose=False, pbs_turb=False,
                      runmethod=None, write_htc=True, zipchunks=False,
                      walltime='04:00:00', postpro_node=False, compress=False,
                      dlcs_dir='htc/DLCs', postpro_node_zipchunks=True,
                      wine_arch='win32', wine_prefix='.wine32', ppn=17,
                      m=[3,4,6,8,9,10,12], prelude='', linux=False):
    """
    Launch load cases defined in Excel files
    """

    iter_dict = dict()
    iter_dict['[empty]'] = [False]

    if postpro_node or postpro_node_zipchunks:
        pyenv = 'py36-wetb'
    else:
        pyenv = None

    # FIXME: THIS IS VERY MESSY, we have wine_prefix/arch and exesingle/chunks
    if linux:
        wine_arch = None
        wine_prefix = None
        prelude = 'module load mpi/openmpi_1.6.5_intelv14.0.0\n'

    # if linux:
    #     pyenv = 'py36-wetb'
    #     pyenv_cmd = 'source /home/python/miniconda3/bin/activate'
    #     exesingle = "{hawc2_exe:} {fname_htc:}"
    #     exechunks = "({winenumactl:} {hawc2_exe:} {fname_htc:}) "
    #     exechunks += "2>&1 | tee {fname_pbs_out:}"
    # else:
    #     pyenv = ''
    #     pyenv_cmd = 'source /home/ozgo/bin/activate_hawc2cfd.sh'
    #     exesingle = "time {hawc2_exe:} {fname_htc:}"
    #     exechunks = "(time numactl --physcpubind=$CPU_NR {hawc2_exe:} {fname_htc:}) "
    #     exechunks += "2>&1 | tee {fname_pbs_out:}"

    # see if a htc/DLCs dir exists
    # Load all DLC definitions and make some assumptions on tags that are not
    # defined
    if os.path.exists(dlcs_dir):
        opt_tags = dlcdefs.excel_stabcon(dlcs_dir, silent=silent,
                                         p_source=P_SOURCE)
    else:
        opt_tags = dlcdefs.excel_stabcon(os.path.join(P_SOURCE, 'htc'),
                                         silent=silent, p_source=P_SOURCE)

    if len(opt_tags) < 1:
        raise ValueError('There are is not a single case defined. Make sure '
                         'the DLC spreadsheets are configured properly.')

    # add all the root files, except anything with *.zip
    f_ziproot = []
    for (dirpath, dirnames, fnames) in os.walk(P_SOURCE):
        # remove all zip files
        for i, fname in enumerate(fnames):
            if not fname.endswith('.zip'):
                f_ziproot.append(fname)
        break
    # and add those files
    for opt in opt_tags:
        opt['[zip_root_files]'] = f_ziproot

    if runmethod == None:
        runmethod = RUNMETHOD

    master = master_tags(sim_id, runmethod=runmethod, silent=silent,
                         verbose=verbose)
    master.tags['[sim_id]'] = sim_id
    master.tags['[walltime]'] = walltime
    master.output_dirs.append('[Case folder]')
    master.output_dirs.append('[Case id.]')

    # TODO: copy master and DLC exchange files to p_root too!!

    # all tags set in master_tags will be overwritten by the values set in
    # variable_tag_func(), iter_dict and opt_tags
    # values set in iter_dict have precedence over opt_tags vartag_func()
    # has precedense over iter_dict, which has precedence over opt_tags.
    # dlcdefs.vartag_excel_stabcon adds support for creating hydro files
    vartag_func = dlcdefs.vartag_excel_stabcon
    cases = sim.prepare_launch(iter_dict, opt_tags, master, vartag_func,
                               write_htc=write_htc, runmethod=runmethod,
                               copyback_turb=True, update_cases=False, msg='',
                               ignore_non_unique=False, run_only_new=False,
                               pbs_fname_appendix=False, short_job_names=False,
                               silent=silent, verbose=verbose, pyenv=pyenv,
                               m=[3,4,6,8,9,10,12], postpro_node=postpro_node,
                               exechunks=None, exesingle=None, prelude=prelude,
                               postpro_node_zipchunks=postpro_node_zipchunks,
                               wine_arch=wine_arch, wine_prefix=wine_prefix)

    if pbs_turb:
        # to avoid confusing HAWC2 simulations and Mann64 generator PBS files,
        # MannTurb64 places PBS launch scripts in a "pbs_in_turb" folder
        mann64 = sim.MannTurb64(silent=silent)
        mann64.walltime = '00:59:59'
        mann64.queue = 'workq'
        mann64.gen_pbs(cases)

    if zipchunks:
        # create chunks
        # sort so we have minimal copying turb files from mimer to node/scratch
        # note that walltime here is for running all cases assigned to the
        # respective nodes. It is not walltime per case.
        sorts_on = ['[DLC]', '[Windspeed]']
        create_chunks_htc_pbs(cases, sort_by_values=sorts_on, queue='workq',
                              ppn=ppn, nr_procs_series=3, walltime='09:00:00',
                              chunks_dir='zip-chunks-jess', compress=compress,
                              wine_arch=wine_arch, wine_prefix=wine_prefix,
                              prelude=prelude, ppn_pbs=20)

    df = sim.Cases(cases).cases2df()
    df.to_excel(os.path.join(POST_DIR, sim_id + '.xls'))
Ejemplo n.º 52
0
def hsic_lasso(X_in,
               Y_in,
               y_kernel,
               x_kernel='Gauss',
               n_jobs=-1,
               discarded=0,
               B=0,
               perms=1):
    """
    Input:
        X_in      input_data
        Y_in      target_data
        y_kernel  We employ the Gaussian kernel for inputs. For output kernels,
                  we use the Gaussian kernel for regression cases and
                  the delta kernel for classification problems.
    Output:
        X         matrix of size d x (n * B (or n) * perms)
        X_ty      vector of size d x 1
    """
    d, n = X_in.shape
    dy = Y_in.shape[0]

    # Centering matrix
    H = np.eye(B, dtype=np.float32) - 1 / B * np.ones(B, dtype=np.float32)
    # print("H shepe:",H.shape)
    lf = np.zeros((n * B * perms, 1), dtype=np.float32)
    index = np.arange(n)
    st = 0
    ed = B**2

    # Normalize data
    if x_kernel == 'Gauss':
        X_in = (X_in / (X_in.std(1)[:, None] + 10e-20)).astype(np.float32)
    if y_kernel == "Gauss":
        Y_in = (Y_in / (Y_in.std(1)[:, None] + 10e-20)).astype(np.float32)

    # Compute y kernel matrix
    for p in range(perms):
        np.random.seed(p)
        index = np.random.permutation(index)

        for i in range(0, n - discarded, B):
            j = min(n, i + B)

            if y_kernel == "Delta":
                if dy > 1:
                    raise RuntimeError(
                        "Delta kernel only supports 1 dimensional class labels."
                    )

                L = kernel_delta_norm(Y_in[:, index[i:j]], Y_in[:, index[i:j]])
            elif y_kernel == "Gauss":
                YY = Y_in[:, index[i:j]]
                print("YY shape:", YY.shape)
                L = kernel_gaussian(YY, YY, np.sqrt(dy))

            L = np.dot(H, np.dot(L, H))

            # Normalize HSIC tr(L*L) = 1
            L = L / np.linalg.norm(L, 'fro')

            lf[st:ed, 0] = L.flatten()  # lf: (n**2)*1的矩阵
            st += B**2
            ed += B**2

    # Preparing design matrix for HSIC Lars
    result = Parallel(n_jobs=n_jobs)([
        delayed(compute_input_matrix)(X_in[k, :], k, B, n, discarded, perms,
                                      x_kernel) for k in range(d)
    ])
    result = dict(result)
    # print(result)
    X = np.array([result[k] for k in range(d)]).T
    print("X shape:", X.shape)
    X_ty = np.dot(X.T, lf)

    return X, X_ty
Ejemplo n.º 53
0
logger = logging.getLogger(__name__)

USER_HOME = os.path.expanduser("~")
PROJECT_PATH = os.path.dirname(os.path.dirname(__file__))

DATA_PATH = os.path.join(os.path.dirname(__file__), 'data')
DATA_INFO_FILE = os.path.join(DATA_PATH, 'data_info.csv')

BIGDATA_PATH = os.path.join(os.path.dirname(__file__), 'bigdata')
BIGDATA_INFO_FILE = os.path.join(DATA_PATH, 'bigdata_info.csv')
BIGDATA_INFO_LATEST = BIGDATA_INFO_FILE[:-4] + '.latest.csv'
touch_p(BIGDATA_INFO_FILE, times=False)
touch_p(BIGDATA_INFO_LATEST, times=False)

UTF8_TABLE = read_csv(os.path.join(DATA_PATH, 'utf8.csv'))
UTF8_TO_MULTIASCII = dict(zip(UTF8_TABLE.char, UTF8_TABLE.multiascii))
UTF8_TO_ASCII = dict(zip(UTF8_TABLE.char, UTF8_TABLE.ascii))

# rename secrets.cfg.EXAMPLE_TEMPLATE -> secrets.cfg then edit secrets.cfg to include your actual credentials
secrets = configparser.RawConfigParser()
try:
    secrets.read(os.path.join(PROJECT_PATH, 'secrets.cfg'))
    secrets = secrets._sections
except IOError:
    logger.error(
        'Unable to load/parse secrets.cfg file at "{}". Does it exist?'.format(
            os.path.join(PROJECT_PATH, 'secrets.cfg')))
    secrets = {}

secrets = dict2obj(secrets)
Ejemplo n.º 54
0
import blobxfer.api
import blobxfer.util
# local imports
try:
    import cli.settings as settings
except (SystemError, ImportError):  # noqa
    try:
        from . import settings
    except (SystemError, ImportError):  # noqa
        # for local testing
        import settings

# create logger
logger = logging.getLogger('blobxfer')
# global defines
_CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])


class CliContext(object):
    """CliContext class: holds context for CLI commands"""
    def __init__(self):
        """Ctor for CliContext"""
        self.config = None
        self.cli_options = {}
        self.credentials = None
        self.general_options = None
        self.show_config = False

    def initialize(self, action):
        # type: (CliContext, settings.TransferAction) -> None
        """Initialize context
Ejemplo n.º 55
0
 def __init__(self, *args, **kw):
     super(*args, **kw)
     self._clients = deque()
     self._channels = dict()
Ejemplo n.º 56
0
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""memote command line interface."""

from __future__ import absolute_import

import logging
from builtins import dict

import click

from memote.suite.cli.config import ConfigFileProcessor

LOGGER = logging.getLogger(__name__)

try:
    CONTEXT_SETTINGS = dict(default_map=ConfigFileProcessor.read_config())
except click.BadParameter as err:
    LOGGER.error("Error in configuration file: {}\nAll configured values will "
                 "be ignored!".format(str(err)))
    CONTEXT_SETTINGS = dict()
Ejemplo n.º 57
0
def insert_part_fields_into_sch(part_fields_dict, filename, recurse, group_components, backup):
    '''Insert the fields in the extracted part dictionary into a schematic.'''

    logger.log(
        DEBUG_OVERVIEW,
        'Inserting extracted fields into schematic file {}.'.format(filename))

    if backup:
        create_backup(filename)

    def reorder_sch_fields(fields):
        '''Return the part fields with the named fields ordered alphabetically.'''
        # Sort the named fields that come after the first four, unnamed fields.
        sort_key = operator.itemgetter('name')
        if USING_PYTHON2:
            sort_key_func = lambda s: unicode(sort_key(s))
        else:
            sort_key_func = sort_key
        named_fields = sorted(fields[4:], key=sort_key_func)
        # Renumber the ids of the sorted fields.
        for id, field in enumerate(named_fields, 4):
            field['id'] = str(id)
        # Return the first four fields plus the remaining sorted fields.
        return fields[:4] + named_fields

    # Get an existing schematic or abort. (There's no way we can create
    # a viable schematic file just from part field values.)
    try:
        sch = Schematic(filename)
    except IOError:
        logger.warn('Schematic file {} not found.'.format(filename))
        return

    # Go through all the schematic components, replacing field values and
    # adding new fields found in the part fields dictionary.
    for component in sch.components:

        prev_part_fields = None

        # For each reference for this component, search in the dictionary
        # for new or updated fields for this part.
        refs = get_component_refs(component)
        for ref in refs:

            # Get the part fields for the given part reference (or an empty list).
            part_fields = part_fields_dict.get(ref, {})

            # Warn if the current part fields for this component don't match the
            # previous part fields (which may happen with hierarchical schematics).
            if prev_part_fields is not None and part_fields != prev_part_fields:
                logger.warn(
                    "The inserted part lists for hierarchically-instantiated components {} have different values.".format(
                        refs))
            # Store the part fields for later comparison.
            prev_part_fields = deepcopy(part_fields)

            # Insert the fields from the part dictionary into the component fields.
            for field_name, field_value in part_fields.items():

                # Create a dict to hold the field visibility attribute.
                try:
                    field_attributes = dict()
                    INVIS_PREFIX = '[I]'
                    VISIBLE_PREFIX = '[V]'
                    INVIS_CODE = '0001'
                    VISIBLE_CODE = '0000'
                    if field_name.startswith(INVIS_PREFIX):
                        field_attributes['attributes'] = INVIS_CODE
                        field_name = field_name[len(INVIS_PREFIX):]
                    elif field_name.startswith(VISIBLE_PREFIX):
                        field_attributes['attributes'] = VISIBLE_CODE
                        field_name = field_name[len(VISIBLE_PREFIX):]
                    if field_value.startswith(INVIS_PREFIX):
                        field_attributes['attributes'] = INVIS_CODE
                        field_value = field_value[len(INVIS_PREFIX):]
                    elif field_value.startswith(VISIBLE_PREFIX):
                        field_attributes['attributes'] = VISIBLE_CODE
                        field_value = field_value[len(VISIBLE_PREFIX):]
                except AttributeError:
                    # If we get here, it's probably because field_value is not a
                    # string so the startswith() method wasn't found. Because it's
                    # not a string, there's no way for it to have a prefix string
                    # so we can just ignore the exception because the action never
                    # would have happened anyway.
                    pass

                # Also store a position for a new field based on the REF position.
                posx = component.fields[0]['posx']
                posy = str(int(component.fields[0]['posy']) + 100) # Place it below REF.
                field_position = {'posx':posx, 'posy':posy}

                # Get the field id associated with this field name (if there is one).
                field_id = lib_field_name_to_id.get(field_name, None)

                # Search for an existing field with a matching name in the component.
                for f in component.fields:

                    if unquote(f['name']).lower() == field_name.lower():
                        # Update existing named field in component.
                        logger.log(DEBUG_OBSESSIVE,
                                   'Updating {} field {} from {} to {}'.format(
                                       ref, f['id'], f['ref'], quote(field_value)))
                        f['ref'] = quote(field_value)
                        # Set field attributes but don't change its position.
                        if 'attributes' in field_attributes:
                            f['attributes'] = field_attributes['attributes']
                        break

                    elif f['id'] == field_id:
                        # Update one of the default, unnamed fields in component.
                        logger.log(DEBUG_OBSESSIVE,
                                   'Updating {} field {} from {} to {}'.format(
                                       ref, f['id'], f['ref'], quote(field_value)))
                        f['ref'] = quote(field_value)
                        # Set field attributes but don't change its position.
                        if 'attributes' in field_attributes:
                            f['attributes'] = field_attributes['attributes']
                        break

                # No existing field to update, so add a new field.
                else:
                    if field_value not in (None, ''):
                        # Add new named field and value to component.
                        new_field = {'ref': quote(field_value),
                                     'name': quote(field_name)}
                        new_field.update(field_attributes) # Set field's attributes.
                        new_field.update(field_position) # Set new field's position.
                        component.addField(new_field)
                        logger.log(DEBUG_OBSESSIVE,
                                   'Adding {} field {} with value {}'.format(
                                       ref, component.fields[-1]['id'],
                                       quote(field_value)))

                # Remove any named fields with empty values.
                component.fields = [
                    f
                    for f in component.fields
                    if unquote(f.get('name', None)) in (None, '', '~') or
                    unquote(f.get('ref', None)) not in (None, '')
                ]

                # Canonically order the fields to make schematic comparisons
                # easier during acceptance testing.
                # component.fields = reorder_sch_fields(component.fields)

    # Save the updated schematic.
    sch.save(filename)

    # If this schematic references other schematic sheets, then insert the part fields into those, too.
    if recurse:
        for sheet in sch.sheets:
            # If filename includes a path, save this path to prepend below
            if filename.count('/') > 0:
                prepend_dir = filename.rsplit('/', 1)[0] + '/'
            else:
                prepend_dir = './'
            for field in sheet.fields:
                if field['id'] == 'F1':
                    # Prepend path for sheets which are nested more than once
                    sheet_file = prepend_dir + unquote(field['value'])
                    insert_part_fields_into_sch(part_fields_dict, sheet_file, recurse, group_components, backup)
                    break
Ejemplo n.º 58
0
ABBREVIATIONS = [
    "a.C.", "all.", "apr.", "art.", "artt.", "b.c.", "c.a.", "cfr.", "c.d.",
    "c.m.", "C.V.", "d.C.", "Dott.", "ecc.", "egr.", "e.v.", "fam.", "giu.",
    "Ing.", "L.", "n.", "op.", "orch.", "p.es.", "Prof.", "prof.", "ql.co.",
    "secc.", "sig.", "s.l.m.", "s.r.l.", "Spett.", "S.P.Q.C.", "v.c."
]

replacements = ("a", "co", "all", "anch", "nient", "cinquant", "b", "de",
                "dev", "bell", "quell", "diciott", "c", "gl", "don", "cent",
                "quest", "occupo", "d", "po", "dov", "dall", "trent",
                "sessant", "l", "un", "nel", "dell", "tropp", "m", "king", "n",
                "nell", "r", "sant", "s", "sott", "sull", "tant", "tutt",
                "vent")

replacements += tuple(k.capitalize() for k in replacements)
replacements = dict((k + "'", k + "' ") for k in replacements)


def find_lemmata(tokens):
    """ Annotates the tokens with lemmata for plural nouns and conjugated verbs,
        where each token is a [word, part-of-speech] list.
    """
    for token in tokens:
        word, pos, lemma = token[0], token[1], token[0]
        if pos.startswith(("DT", )):
            lemma = singularize(word, pos="DT")
        if pos.startswith("JJ"):
            lemma = predicative(word)
        if pos == "NNS":
            lemma = singularize(word)
        if pos.startswith(("VB", "MD")):
Ejemplo n.º 59
0
# -*- coding: utf-8 -*-
''' ระบบแปลงเลขใน 1- 10 ภาษาไทย
fork by http://justmindthought.blogspot.com/2012/12/code-php.html
'''
from __future__ import absolute_import, division, print_function, unicode_literals
from builtins import dict
from builtins import int
import math, six, ast
p = [[u'ภาษาไทย', u'ตัวเลข', u'เลขไทย'], [u'หนึ่ง', u'1', u'๑'],
     [u'สอง', u'2', u'๒'], [u'สาม', u'3', u'๓'], [u'สี่', u'4', u'๔'],
     [u'ห้า', u'5', u'๕'], [u'หก', u'6', u'๖'], [u'หก', u'7', u'๗'],
     [u'แปด', u'8', u'๘'], [u'เก้า', u'9', u'๙']]
thaitonum = dict((x[2], x[1]) for x in p[1:])
p1 = dict((x[0], x[1]) for x in p[1:])
d1 = 0


#เลขไทยสู่เลข
def thai_num_to_num(text):
    """รับค่า ''str'' คืนค่า ''str'' เป็นเลขไทยสู่เลข"""
    thaitonum = dict((x[2], x[1]) for x in p[1:])
    return thaitonum[text]


#เลขไทยสู่ข้อความ
def thai_num_to_text(text):
    """รับค่า ''str'' คืนค่า ''str'' เป็นเลขไทยสู่ข้อความ"""
    thaitonum = dict((x[2], x[0]) for x in p[1:])
    return thaitonum[text]

Ejemplo n.º 60
0
    def initialize_save(self,
                        output_path,
                        run_name="",
                        run_message="",
                        output_folders=None,
                        del_existing_folders=False,
                        logger_cfg=None):
        output_path = dev_tool.entries_to_str(output_path)
        """Initialize the run. Create the neccesary folders.

        Parameters
        ----------
        Best Practice: enter a whole config file

        output_path : str
            Absolute path to the folder where the run output folder will be
            created (named after the run) which will contain all the output
            folders (logfile, figures, output etc)
        run_name : str
            The name of the run and also of the output folder.
        run_message : str
            A message that is displayed below the titel: a further comment
            on what you do in the script
        output_folders : dict
            Contain the name of the folders for the different outputs. For the
            available keys
            see :py:const:`~raredecay.meta_config.__DEFAULT_OUTPUT_FOLDERS`.
        del_existing_dir : boolean
            If True, an already existing folder with the same name will be deleted.
            If False and the folder exists already, an exception will be raised.
        logger_cfg : dict
            The configuration for the logger, which will be created later. If
            not specified (or only a few arguments), the meta_config will be
            taken.
        """
        run_name = dev_tool.entries_to_str(run_name)
        run_message = dev_tool.entries_to_str(run_message)
        output_folders = dev_tool.entries_to_str(output_folders)
        logger_cfg = dev_tool.entries_to_str(logger_cfg)

        self._save_output = True
        # initialize defaults
        logger_cfg = {} if logger_cfg is None else logger_cfg
        self._logger_cfg = dict(meta_cfg.DEFAULT_LOGGER_CFG, **logger_cfg)

        assert isinstance(output_path, basestring), "output_path not a string"
        output_folders = {} if output_folders is None else output_folders
        self._output_folders = dict(meta_cfg.DEFAULT_OUTPUT_FOLDERS,
                                    **output_folders)

        # make sure no blank spaces are left in the folder names
        for key, value in list(self._output_folders.items()):
            assert isinstance(
                value, basestring), "path is not a string: " + str(value)
            self._output_folders[key] = value.replace(" ", "_")

        # ask if you want to add something to the run_name (and folder name)
        if meta_cfg.PROMPT_FOR_COMMENT:
            prompt_message = "Enter an (optional) extension to the run-name and press 'enter':\n"
            temp_add = str(input(prompt_message))
            run_name += " " + temp_add if temp_add != "" else ""
            # del temp_add
            # TODO: implement promt with timeout
        self._run_name = run_name

        # "clean" and correct the path-name
        for char in self._REPLACE_CHAR:
            run_name = run_name.replace(char, "_")
        output_path += run_name if output_path.endswith(
            '/') else '/' + run_name
        self._output_path = os.path.expanduser(
            output_path)  # replaces ~ with /home/myUser

        # find a non-existing folder
        temp_i = 1
        while os.path.isdir(self._output_path):
            if del_existing_folders:
                self._path_to_be_overriden = output_path
                if not self._path_to_be_overriden.endswith('/'):
                    self._path_to_be_overriden += '/'
            self._output_path = output_path + "_" + str(temp_i)
            temp_i += 1
            assert temp_i < meta_cfg.MAX_AUTO_FOLDERS, \
                "possible endless loop when trying to create a non-existing folder"
        self._output_path += '' if output_path.endswith('/') else '/'

        # create subfolders
        for value in list(self._output_folders.values()):
            subprocess.call(['mkdir', '-p', self._output_path + value])
        subprocess.call(['touch', self._output_path + 'run_NOT_finished'
                         ])  # show that ongoing run

        # set meta-config variables
        meta_cfg.set_parallel_profile(n_cpu=meta_cfg.n_cpu_max,
                                      gpu_in_use=meta_cfg.use_gpu)

        self._is_initialized = True
        self.add_output(run_message,
                        title="Run: " + self._run_name,
                        importance=0,
                        subtitle="Comments about the run")