Esempio n. 1
0
File: cpu.py Progetto: cea-sec/miasm
    def guess_mnemo(cls, bs, attrib, pre_dis_info, offset):
        candidates = []

        candidates = set()

        fname_values = pre_dis_info
        todo = [
            (dict(fname_values), branch, offset * 8)
            for branch in list(viewitems(cls.bintree))
        ]
        for fname_values, branch, offset_b in todo:
            (l, fmask, fbits, fname, flen), vals = branch

            if flen is not None:
                l = flen(attrib, fname_values)
            if l is not None:
                try:
                    v = cls.getbits(bs, attrib, offset_b, l)
                except IOError:
                    # Raised if offset is out of bound
                    continue
                offset_b += l
                if v & fmask != fbits:
                    continue
                if fname is not None and not fname in fname_values:
                    fname_values[fname] = v
            for nb, v in viewitems(vals):
                if 'mn' in nb:
                    candidates.update(v)
                else:
                    todo.append((dict(fname_values), (nb, v), offset_b))

        return [c for c in candidates]
Esempio n. 2
0
    def dropped_samples(self):
        """The samples that were selected but dropped in processing

        Returns
        -------
        dict of sets
            Format is {artifact_id: {sample_id, sample_id, ...}, ...}
        """
        with qdb.sql_connection.TRN:
            bioms = self.biom_tables
            if not bioms:
                return {}

            # get all samples selected for the analysis, converting lists to
            # sets for fast searching. Overhead less this way
            # for large analyses
            all_samples = {k: set(v) for k, v in viewitems(self.samples)}

            for biom, filepath in viewitems(bioms):
                table = load_table(filepath)
                ids = set(table.ids())
                for k in all_samples:
                    all_samples[k] = all_samples[k] - ids

            # what's left are unprocessed samples, so return
            return all_samples
Esempio n. 3
0
    def _redundantFree(self, blocks):
        """
        Redundant-free Comparisons from Kolb et al, "Dedoop:
        Efficient Deduplication with Hadoop"
        http://dbs.uni-leipzig.de/file/Dedoop.pdf
        """
        coverage = defaultdict(list)

        for block_id, records in enumerate(blocks):

            for record_id, record in viewitems(records):
                coverage[record_id].append(block_id)

        for block_id, records in enumerate(blocks):
            if block_id % 10000 == 0:
                logger.info("%s blocks" % block_id)

            marked_records = []
            for record_id, record in viewitems(records):
                smaller_ids = {covered_id for covered_id
                               in coverage[record_id]
                               if covered_id < block_id}
                marked_records.append((record_id, record, smaller_ids))

            yield marked_records
Esempio n. 4
0
File: cpu.py Progetto: cea-sec/miasm
def factor_one_bit(tree):
    if isinstance(tree, set):
        return tree
    new_keys = defaultdict(lambda: defaultdict(dict))
    if len(tree) == 1:
        return tree
    for k, v in viewitems(tree):
        if k == "mn":
            new_keys[k] = v
            continue
        l, fmask, fbits, fname, flen = k
        if flen is not None or l <= 1:
            new_keys[k] = v
            continue
        cfmask = fmask >> (l - 1)
        nfmask = fmask & ((1 << (l - 1)) - 1)
        cfbits = fbits >> (l - 1)
        nfbits = fbits & ((1 << (l - 1)) - 1)
        ck = 1, cfmask, cfbits, None, flen
        nk = l - 1, nfmask, nfbits, fname, flen
        if nk in new_keys[ck]:
            raise NotImplementedError('not fully functional')
        new_keys[ck][nk] = v
    for k, v in list(viewitems(new_keys)):
        new_keys[k] = factor_one_bit(v)
    # try factor sons
    if len(new_keys) != 1:
        return new_keys
    subtree = next(iter(viewvalues(new_keys)))
    if len(subtree) != 1:
        return new_keys
    if next(iter(subtree)) == 'mn':
        return new_keys

    return new_keys
Esempio n. 5
0
    def dropped_samples(self):
        """The samples that were selected but dropped in processing

        Returns
        -------
        dict of sets
            Format is {processed_data_id: {sample_id, sample_id, ...}, ...}
        """
        bioms = self.biom_tables
        if not bioms:
            return {}

        # get all samples selected for the analysis, converting lists to
        # sets for fast searching. Overhead less this way for large analyses
        all_samples = {k: set(v) for k, v in viewitems(self.samples)}

        for biom, filepath in viewitems(bioms):
            table = load_table(filepath)
            # remove the samples from the sets as they are found in the table
            proc_data_ids = set(sample['Processed_id']
                                for sample in table.metadata())
            ids = set(table.ids())
            for proc_data_id in proc_data_ids:
                all_samples[proc_data_id] = all_samples[proc_data_id] - ids

        # what's left are unprocessed samples, so return
        return all_samples
Esempio n. 6
0
    def post(self):
        barcodes = self.get_argument('barcodes').split(',')
        if self.get_argument('blanks'):
            blanks = self.get_argument('blanks').split(',')
        else:
            blanks = []
        if self.get_argument('external'):
            external = self.get_argument('external').split(',')
        else:
            external = []
        # Get metadata and create zip file
        metadata, failures = db.pulldown(barcodes, blanks, external)

        meta_zip = InMemoryZip()
        failed = '\n'.join(['\t'.join(bc) for bc in viewitems(failures)])
        failtext = ("The following barcodes were not retrieved "
                    "for any survey:\n%s" % failed)
        meta_zip.append("failures.txt", failtext)
        for survey, meta in viewitems(metadata):
            meta_zip.append('survey_%s_md.txt' % survey, meta)

        # write out zip file
        self.add_header('Content-type',  'application/octet-stream')
        self.add_header('Content-Transfer-Encoding', 'binary')
        self.add_header('Accept-Ranges', 'bytes')
        self.add_header('Content-Encoding', 'none')
        self.add_header('Content-Disposition',
                        'attachment; filename=metadata.zip')
        self.write(meta_zip.write_to_buffer())
        self.flush()
        self.finish()
Esempio n. 7
0
def to_object(data, field_maps, cls=None, wrap=True, dc=None):
    """It does below stuffs
    1. Convert the data to common convention,
    2. Wrap the converted data to provided class
       and return the instance of that class

    :param dc: datacenter name to be set as an attribute on all objects
    :param data: this data should be either a list of dicts or dict itself
    :param field_maps: a dict of mapping values in form of {mapping: original}
    :param cls: Class to which the data to be wrapped
    :param wrap: Whether to wrap the data in the class or not
    :return: instance of cls
    """
    if isinstance(data, list):
        new_data = []
        for instance in data:
            if dc:
                instance.update({"datacenter": dc})
                field_maps.update({"datacenter": "datacenter"})
            new_data.append({
                mapping: instance.get(orig, None) for mapping, orig in viewitems(field_maps)
            })
        if wrap:
            return [cls(**instance) for instance in new_data]
        else:
            return new_data
    elif isinstance(data, dict):
        if dc:
            data.update({"datacenter": dc})
            field_maps.update({"datacenter": "datacenter"})
        new_data = {mapping: data.get(orig, None) for mapping, orig in viewitems(field_maps)}
        if wrap:
            return cls(**new_data)
        else:
            return new_data
Esempio n. 8
0
def rollout(env, agent, timestep_limit):
    """
    Simulate the env and agent for timestep_limit steps
    """
    ob = env.reset()
    terminated = False

    data = defaultdict(list)
    for _ in range(timestep_limit):
        ob = agent.obfilt(ob)
        data["observation"].append(ob)
        action, agentinfo = agent.act(ob)
        data["action"].append(action)
        for (k,v) in viewitems(agentinfo):
            data[k].append(v)
        ob,rew,done,envinfo = env.step(action)
        data["reward"].append(rew)
        rew = agent.rewfilt(rew)
        data["reward_filt"] = rew
        for (k,v) in viewitems(envinfo):
            data[k].append(v)
        if done:
            terminated = True
            break
    data = {k:np.array(v) for (k,v) in viewitems(data)}
    data["terminated"] = terminated
    return data
Esempio n. 9
0
def getChainsFromConnections(connections,checkConnections=True):
    '''Take a list of connections and return a list of connection chains
       connections is a dictionary of connections between elements (which must be hashable)
         and can be generated using getElementConnections
       The checkConnections option tests that there is only one one path
         through each point (aka 2 or fewer connections, no branching)
       Returns a list of chains (lists of elements)
       '''
    connections = deepcopy(connections) # Protect the input from modification
    if checkConnections: # Check that there is no branching
        assert all( len(v)<3 for k,v in viewitems(connections) ), 'Aborting; this network has branching'

    chains = []
    while len(connections): # loop over possible chains
        # Pick a starting point (an end point if possible)
        currPt = _firstOrOther([pt for pt,conn in viewitems(connections)
                                   if len(conn)==1],
                               next(iter(connections))) # was connections.keys()[0]
        # Form a chain and move the current point forward
        chain = [currPt]
        currPt = connections.pop(currPt)[0]
        while currPt: # loop to fill a chain, stop on an invalid
            chain.append(currPt)
            if len(connections)==0:
                break
            connections[currPt] = deletecases(connections[currPt], [chain[-2]])
            currPt = _firstOrOther(connections.pop(currPt,[]))
        chains.append(chain)
    return chains
Esempio n. 10
0
  def setUp(self) :
    data_d = {
      100 : {"name": "Bob", "age": "50", "dataset": 0},
      105 : {"name": "Charlie", "age": "75", "dataset": 1},
      110 : {"name": "Meredith", "age": "40", "dataset": 1},
      115 : {"name": "Sue", "age": "10", "dataset": 0},
      120 : {"name": "Jimbo", "age": "21","dataset": 0},
      125 : {"name": "Jimbo", "age": "21", "dataset": 0},
      130 : {"name": "Willy", "age": "35", "dataset": 0},
      135 : {"name": "Willy", "age": "35", "dataset": 1},
      140 : {"name": "Martha", "age": "19", "dataset": 1},
      145 : {"name": "Kyle", "age": "27", "dataset": 0},
    }


    self.blocker = dedupe.blocking.Blocker([dedupe.predicates.TfidfTextSearchPredicate(0.0, "name")])

    self.records_1 = dict((record_id, record) 
                          for record_id, record 
                          in viewitems(data_d)
                          if record["dataset"] == 0)

    self.fields_2 = dict((record_id, record["name"])
                         for record_id, record 
                         in viewitems(data_d)
                         if record["dataset"] == 1)
Esempio n. 11
0
File: sem.py Progetto: cea-sec/miasm
def compute(asm, inputstate={}, debug=False):
    loc_db = LocationDB()
    sympool = dict(regs_init)
    sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)})
    ir_tmp = ir_arch(loc_db)
    ircfg = ir_tmp.new_ircfg()
    symexec = SymbolicExecutionEngine(ir_tmp, sympool)
    instr = mn.fromstring(asm, loc_db, "l")
    code = mn.asm(instr)[0]
    instr = mn.dis(code, "l")
    instr.offset = inputstate.get(PC, 0)
    lbl = ir_tmp.add_instr_to_ircfg(instr, ircfg)
    symexec.run_at(ircfg, lbl)
    if debug:
        for k, v in viewitems(symexec.symbols):
            if regs_init.get(k, None) != v:
                print(k, v)
    out = {}
    for k, v in viewitems(symexec.symbols):
        if k in EXCLUDE_REGS:
            continue
        elif regs_init.get(k, None) == v:
            continue
        elif isinstance(v, ExprInt):
            out[k] = int(v)
        else:
            out[k] = v
    return out
Esempio n. 12
0
    def coveredPairs(self, blocker, records_1, records_2):
        cover = {}

        pair_enumerator = core.Enumerator()

        for predicate in blocker.predicates:
            cover[predicate] = collections.defaultdict(lambda: (set(), set()))
            for id, record in viewitems(records_2):
                blocks = predicate(record, target=True)
                for block in blocks:
                    cover[predicate][block][1].add(id)

            current_blocks = set(cover[predicate])
            for id, record in viewitems(records_1):
                blocks = set(predicate(record))
                for block in blocks & current_blocks:
                    cover[predicate][block][0].add(id)

        for predicate, blocks in cover.items():
            pairs = {pair_enumerator[pair]
                     for A, B in blocks.values()
                     for pair in itertools.product(A, B)}
            cover[predicate] = Counter(pairs)

        return cover
Esempio n. 13
0
File: dse.py Progetto: cea-sec/miasm
    def restore_snapshot(self, snapshot, memory=True):
        """Restore a @snapshot taken with .take_snapshot
        @snapshot: .take_snapshot output
        @memory: (optional) if set, also restore the memory
        """
        # Restore memory
        if memory:
            self.jitter.vm.reset_memory_page_pool()
            self.jitter.vm.reset_code_bloc_pool()
            for addr, metadata in viewitems(snapshot["mem"]):
                self.jitter.vm.add_memory_page(
                    addr,
                    metadata["access"],
                    metadata["data"]
                )

        # Restore registers
        self.jitter.pc = snapshot["regs"][self.ir_arch.pc.name]
        for reg, value in viewitems(snapshot["regs"]):
            setattr(self.jitter.cpu, reg, value)

        # Reset intern elements
        self.jitter.vm.set_exception(0)
        self.jitter.cpu.set_exception(0)
        self.jitter.bs._atomic_mode = False

        # Reset symb exec
        for key, _ in list(viewitems(self.symb.symbols)):
            del self.symb.symbols[key]
        for expr, value in viewitems(snapshot["symb"]):
            self.symb.symbols[expr] = value
Esempio n. 14
0
File: cpu.py Progetto: cea-sec/miasm
def factor_fields(tree):
    if not isinstance(tree, dict):
        return tree
    if len(tree) != 1:
        return tree
    # merge
    k1, v1 = next(iter(viewitems(tree)))
    if k1 == "mn":
        return tree
    l1, fmask1, fbits1, fname1, flen1 = k1
    if fname1 is not None:
        return tree
    if flen1 is not None:
        return tree

    if not isinstance(v1, dict):
        return tree
    if len(v1) != 1:
        return tree
    k2, v2 = next(iter(viewitems(v1)))
    if k2 == "mn":
        return tree
    l2, fmask2, fbits2, fname2, flen2 = k2
    if fname2 is not None:
        return tree
    if flen2 is not None:
        return tree
    l = l1 + l2
    fmask = (fmask1 << l2) | fmask2
    fbits = (fbits1 << l2) | fbits2
    fname = fname2
    flen = flen2
    k = l, fmask, fbits, fname, flen
    new_keys = {k: v2}
    return new_keys
Esempio n. 15
0
    def write_script_rule(self, inputs, outputs, parameters, shell_template, rule_name):
        assert '_bash_' not in parameters
        first_output_name, first_output_fn = outputs.items()[0] # for rundir, since we cannot sub wildcards in shell
        if not rule_name:
            rule_name = os.path.dirname(first_output_fn)
        rule_name = self.unique_rule_name(self.legalize(rule_name))
        wildcard_rundir = os.path.normpath(os.path.dirname(first_output_fn)) # unsubstituted
        # We use snake_string_path b/c normpath drops leading ./, but we do NOT want abspath.
        input_kvs = ', '.join('%s=%s'%(k, snake_string_path(v)) for k,v in
                sorted(viewitems(inputs)))
        output_kvs = ', '.join('%s=%s'%(k, snake_string_path(v)) for k,v in
                sorted(viewitems(outputs)))
        rule_parameters = {k: v for (k, v) in viewitems(parameters) if not k.startswith('_')}
        #rule_parameters['reltopdir'] = os.path.relpath('.', wildcard_rundir) # in case we need this later
        params = ','.join('\n        %s="%s"'%(k,v) for (k, v) in viewitems(rule_parameters))
        shell = snake_shell(shell_template, wildcard_rundir)
        # cd $(dirname '{output.%(first_output_name)s}')
        rule = """
rule static_%(rule_name)s:
    input:  %(input_kvs)s
    output: %(output_kvs)s
    params:%(params)s
    shell:
        '''
outdir=$(dirname {output[0]})
#mkdir -p ${{outdir}}
cd ${{outdir}}
date

%(shell)s
date
'''
"""%(locals())
        self.write(rule)
Esempio n. 16
0
    def comparisons(self, cover, compound_length) :
        CP = predicates.CompoundPredicate

        block_index = {}
        for predicate, blocks in viewitems(cover):
            block_index[predicate] = {}
            for block_id, blocks in viewitems(blocks) :
                for id in self._blocks(blocks) :
                    block_index[predicate].setdefault(id, set()).add(block_id)

        compounder = self.Compounder(cover, block_index)
        comparison_count = {}
        simple_predicates = sorted(cover, key=str)

        for i in range(2, compound_length+1) :
            for combo in itertools.combinations(simple_predicates, i) :
                comparison_count[CP(combo)] = sum(self.pairs(ids)
                                                  for ids in 
                                                  viewvalues(compounder(combo)))
        for pred in simple_predicates :
            comparison_count[pred] = sum(self.pairs(ids)
                                         for ids
                                         in viewvalues(cover[pred]))

        return comparison_count    
Esempio n. 17
0
    def calculate_repr(self):
        result = [
            "[{0}-{1}->{2}]".format(previous, count, nnext)
            for previous, edges in viewitems(self.edges)
            for nnext, count in viewitems(edges)
        ]

        self.repr = "G({0})".format(", ".join(result))
Esempio n. 18
0
    def _filter(self, filters, data):

        for m, o in viewitems(self.field_map):
            if m in filters and o not in filters:
                filters[o] = filters.pop(m, None)

        return [i for i in data
                if all([True if i.get(k) == v else False
                        for k, v in viewitems(filters)])]
Esempio n. 19
0
def expectedLabelPosition(peptide, labelStateInfo, sequence=None,
                          modPositions=None):
    """Returns a modification description of a certain label state of a peptide.

    :param peptide: Peptide sequence used to calculat the expected label state
        modifications
    :param labelStateInfo: An entry of :attr:`LabelDescriptor.labels` that
        describes a label state
    :param sequence: unmodified amino acid sequence of :var:`peptide`, if None
        it is generated by :func:`maspy.peptidemethods.removeModifications()`
    :param modPositions: dictionary describing the modification state of
        "peptide", if None it is generated by
        :func:`maspy.peptidemethods.returnModPositions()`

    :returns: {sequence position: sorted list of expected label modifications
                  on that position, ...
               }
    """
    if modPositions is None:
        modPositions = maspy.peptidemethods.returnModPositions(peptide,
                                                               indexStart=0
                                                               )
    if sequence is None:
        sequence = maspy.peptidemethods.removeModifications(peptide)

    currLabelMods = dict()
    for labelPosition, labelSymbols in viewitems(labelStateInfo['aminoAcidLabels']):
        labelSymbols = aux.toList(labelSymbols)
        if labelSymbols == ['']:
            pass
        elif labelPosition == 'nTerm':
            currLabelMods.setdefault(0, list())
            currLabelMods[0].extend(labelSymbols)
        else:
            for sequencePosition in aux.findAllSubstrings(sequence,
                                                          labelPosition):
                currLabelMods.setdefault(sequencePosition, list())
                currLabelMods[sequencePosition].extend(labelSymbols)

    if labelStateInfo['excludingModifications'] is not None:
        for excludingMod, excludedLabelSymbol in viewitems(labelStateInfo['excludingModifications']):
            if excludingMod not in modPositions:
                continue
            for excludingModPos in modPositions[excludingMod]:
                if excludingModPos not in currLabelMods:
                    continue
                if excludedLabelSymbol not in currLabelMods[excludingModPos]:
                    continue
                if len(currLabelMods[excludingModPos]) == 1:
                    del(currLabelMods[excludingModPos])
                else:
                    excludedModIndex = currLabelMods[excludingModPos].index(excludedLabelSymbol)
                    currLabelMods[excludingModPos].pop(excludedModIndex)

    for sequencePosition in list(viewkeys(currLabelMods)):
        currLabelMods[sequencePosition] = sorted(currLabelMods[sequencePosition])
    return currLabelMods
Esempio n. 20
0
def blockTraining(pairs,
                  predicate_set,
                  eta=.1,
                  epsilon=0,
                  matching = "Dedupe"):
    '''
    Takes in a set of training pairs and predicates and tries to find
    a good set of blocking rules.
    '''

    blocker = blocking.Blocker(predicate_set)
    prepare_index(blocker, pairs, matching)

    if len(pairs['match']) < 50 :
        compound_length = 2
    else :
        compound_length = 3

    dupe_cover = cover(blocker, pairs['match'], compound_length)
    distinct_cover = cover(blocker, pairs['distinct'], compound_length)

    distinct_count = defaultdict(int, {pred : len(pairs)
                                       for pred, pairs
                                       in viewitems(distinct_cover)})

    # Throw away the predicates that cover too many distinct pairs
    coverage_threshold = eta * len(pairs['distinct'])
    logger.info("coverage threshold: %s", coverage_threshold)
    dupe_cover = {pred : pairs
                  for pred, pairs
                  in viewitems(dupe_cover)
                  if distinct_count[pred] < coverage_threshold}

    if not dupe_cover : 
        raise ValueError(NO_PREDICATES_ERROR)

    uncoverable_dupes = set(pairs['match']) - set.union(*viewvalues(dupe_cover))

    if len(uncoverable_dupes) > epsilon :
        logger.warning(OUT_OF_PREDICATES_WARNING)
        logger.debug(uncoverable_dupes)
        epsilon = 0
    else :
        epsilon -= len(uncoverable_dupes)

    chvatal_set = greedy(dupe_cover.copy(), distinct_count, epsilon)

    dupe_cover = {pred : dupe_cover[pred] for pred in chvatal_set}
        
    final_predicates = tuple(dominating(dupe_cover))

    logger.info('Final predicate set:')
    for predicate in final_predicates :
        logger.info(predicate)

    return final_predicates
Esempio n. 21
0
def defaultFetchSiAttrFromSmi(smi, si):
    """Default method to extract attributes from a spectrum metadata item (sai) and
    adding them to a spectrum item (si)."""
    for key, value in viewitems(fetchSpectrumInfo(smi)):
        setattr(si, key, value)
    for key, value in viewitems(fetchScanInfo(smi)):
        setattr(si, key, value)
    if si.msLevel > 1:
        for key, value in viewitems(fetchParentIon(smi)):
            setattr(si, key, value)
Esempio n. 22
0
def get_project_lib(regen=False):
    global projectlib
    if regen is False and projectlib:
        return projectlib
    projectlib = {}
    for project, folder in viewitems(projects.pcbs):
        projectlib[project] = PCBPrototype(project)
    for project, folder in viewitems(projects.cable_projects):
        projectlib[project] = CableProjectPrototype(project)
    return projectlib
Esempio n. 23
0
 def callback(stats):
     global COUNTER
     for (stat,val) in viewitems(stats):
         diagnostics[stat].append(val)
     if args.plot:
         animate_rollout(env, agent, min(500, args.timestep_limit))
     print("*********** Iteration %i ****************" % COUNTER)
     print(tabulate([ (k, v) for k, v in viewitems(stats) if np.asarray(v).size== 1 ])) #pylint: disable=W0110
     COUNTER += 1
     if args.snapshot_every and ((COUNTER % args.snapshot_every==0) or (COUNTER==args.n_iter)): 
         hdf['/agent_snapshots/%0.4i'%COUNTER] = np.array(cPickle.dumps(agent,-1))
Esempio n. 24
0
 def consistency_check(self):
     """Ensure internal structures are consistent with each others"""
     assert set(self._loc_key_to_names).issubset(self._loc_keys)
     assert set(self._loc_key_to_offset).issubset(self._loc_keys)
     assert self._loc_key_to_offset == {v: k for k, v in viewitems(self._offset_to_loc_key)}
     assert reduce(
         lambda x, y:x.union(y),
         viewvalues(self._loc_key_to_names),
         set(),
     ) == set(self._name_to_loc_key)
     for name, loc_key in viewitems(self._name_to_loc_key):
         assert name in self._loc_key_to_names[loc_key]
Esempio n. 25
0
    def _build_biom_tables(self, samples, rarefaction_depth):
        """Build tables and add them to the analysis"""
        with qdb.sql_connection.TRN:
            # filter and combine all study BIOM tables needed for
            # each data type
            new_tables = {dt: None for dt in self.data_types}
            base_fp = qdb.util.get_work_base_dir()
            for a_id, samps in viewitems(samples):
                # one biom table attached to each artifact object
                artifact = qdb.artifact.Artifact(a_id)
                table_fp = None
                for _, fp, fp_type in artifact.filepaths:
                    if fp_type == 'biom':
                        table_fp = fp
                        break
                if not table_fp:
                    raise RuntimeError(
                        "Artifact %s do not have a biom table associated"
                        % a_id)
                table = load_table(table_fp)
                # HACKY WORKAROUND FOR DEMO. Issue # 246
                # make sure samples not in biom table are not filtered for
                table_samps = set(table.ids())
                filter_samps = table_samps.intersection(samps)
                # add the metadata column for study the samples come from
                study_meta = {'Study': artifact.study.title,
                              'Processed_id': artifact.id}
                samples_meta = {sid: study_meta for sid in filter_samps}
                # filter for just the wanted samples and merge into new table
                # this if/else setup avoids needing a blank table to
                # start merges
                table.filter(filter_samps, axis='sample', inplace=True)
                table.add_metadata(samples_meta, axis='sample')
                data_type = artifact.data_type
                if new_tables[data_type] is None:
                    new_tables[data_type] = table
                else:
                    new_tables[data_type] = new_tables[data_type].merge(table)

            # add the new tables to the analysis
            _, base_fp = qdb.util.get_mountpoint(self._table)[0]
            for dt, biom_table in viewitems(new_tables):
                # rarefy, if specified
                if rarefaction_depth is not None:
                    biom_table = biom_table.subsample(rarefaction_depth)
                # write out the file
                biom_fp = join(base_fp, "%d_analysis_%s.biom" % (self._id, dt))
                with biom_open(biom_fp, 'w') as f:
                    biom_table.to_hdf5(f, "Analysis %s Datatype %s" %
                                       (self._id, dt))
                self._add_file("%d_analysis_%s.biom" % (self._id, dt),
                               "biom", data_type=dt)
Esempio n. 26
0
File: sem.py Progetto: cea-sec/miasm
def symb_exec(lbl, ir_arch, ircfg, inputstate, debug):
    sympool = dict(regs_init)
    sympool.update(inputstate)
    symexec = SymbolicExecutionEngine(ir_arch, sympool)
    symexec.run_at(ircfg, lbl)
    if debug:
        for k, v in viewitems(symexec.symbols):
            if regs_init.get(k, None) != v:
                print(k, v)
    return {
        k: v for k, v in viewitems(symexec.symbols)
        if k not in EXCLUDE_REGS and regs_init.get(k, None) != v
    }
Esempio n. 27
0
 def __init__(self, *args, **kwargs):
     """Reads kwargs as properties of self."""
     # perform init on temp dict to preserve interface: will then translate
     # aliased keys when loading into self
     temp = {}
     unalias = self.unalias
     dict.__init__(temp, *args, **kwargs)
     for key, val in viewitems(temp):
         self[unalias(key)] = val
     for name, prototype in viewitems(self.Required):
         new_name = unalias(name)
         if new_name not in self:
             self[new_name] = self._copy(prototype)
Esempio n. 28
0
def store_survey(survey, survey_id):
    """Store the survey

    Parameters
    ----------
    survey : amgut.lib.data_access.survey.Survey
        The corresponding survey
    survey_id : str
        The corresponding survey ID to retreive from redis
    """
    def get_survey_question_id(key):
        return int(key.split('_')[-2])

    data = redis.hgetall(survey_id)
    to_store = PartitionResponse(survey.question_types)
    consent_details = loads(data.pop('consent'))

    if 'existing' in data:
        data.pop('existing')

    for page in data:
        page_data = loads(data[page])
        questions = page_data['questions']

        for quest, resps in viewitems(questions):
            qid = get_survey_question_id(quest)
            qtype = survey.question_types[qid]

            if resps is None:
                resps = {-1}  # unspecified multiple choice
            elif qtype in ['SINGLE', 'MULTIPLE']:
                resps = set([int(i) for i in resps])
            else:
                pass

            to_store[qid] = resps

    with_fk_inserts = []
    for qid, indices in viewitems(to_store.with_fk):
        question = survey.questions[qid]

        for idx in indices:
            resp = question.responses[idx] if idx != -1 else survey.unspecified
            with_fk_inserts.append((survey_id, qid, resp))

    without_fk_inserts = [(survey_id, qid, dumps(v))
                          for qid, v in viewitems(to_store.without_fk)]

    survey.store_survey(consent_details, with_fk_inserts, without_fk_inserts)
Esempio n. 29
0
 def _update_accumulators(cls, elt, accumulators,
                          on_create_set, on_match_set):
     on_create_set.extend(["%s.%s = [%s]" % (elt, field, srcfield)
                           for field, (srcfield, _) in
                           viewitems(accumulators)])
     on_match_set.extend([
         ("%(elt)s.%(field)s = CASE WHEN " +
          ("" if maxvalue is None else
           "SIZE(%(elt)s.%(field)s) > %(maxvalue)d OR ") +
          "%(srcfield)s IN %(elt)s.%(field)s THEN %(elt)s.%(field)s ELSE " +
          "COALESCE(%(elt)s.%(field)s, []) + %(srcfield)s END") % {
              "elt": elt, "field": field, "srcfield": srcfield,
              "maxvalue": maxvalue
          } for field, (srcfield, maxvalue) in viewitems(accumulators)
     ])
Esempio n. 30
0
    def _show_dependencies(self):
        """Show dependencies"""
        created = self.created
        departing_arrows = self.departing_arrows

        self._fix_dependencies()

        for source, targets in viewitems(departing_arrows):
            if source not in created:
                continue
            for target, style in viewitems(targets):
                if target not in created or source == target:
                    continue
                dep = (variable_id(source), variable_id(target))
                self.dependencies[dep] = style
Esempio n. 31
0
File: ir.py Progetto: yarhrn/miasm
 def iteritems(self):
     for dst, src in viewitems(self._assigns):
         yield dst, src
Esempio n. 32
0
File: ir.py Progetto: yarhrn/miasm
 def items(self):
     return [(dst, src) for dst, src in viewitems(self._assigns)]
Esempio n. 33
0
File: ir.py Progetto: yarhrn/miasm
 def __eq__(self, other):
     if set(self.keys()) != set(other.keys()):
         return False
     return all(other[dst] == src for dst, src in viewitems(self))
Esempio n. 34
0
File: ir.py Progetto: yarhrn/miasm
 def __str__(self):
     out = []
     for dst, src in sorted(viewitems(self._assigns)):
         out.append("%s = %s" % (dst, src))
     return "\n".join(out)
Esempio n. 35
0
    def _build_biom_tables(self,
                           samples,
                           rarefaction_depth=None,
                           rename_dup_samples=False):
        """Build tables and add them to the analysis"""
        with qdb.sql_connection.TRN:
            base_fp = qdb.util.get_work_base_dir()

            # this assumes that there is only one reference/pipeline for each
            # data_type issue #164
            new_tables = {dt: None for dt in self.data_types}
            for aid, samps in viewitems(samples):
                artifact = qdb.artifact.Artifact(aid)
                # this is not checking the reference used for picking
                # issue #164
                biom_table_fp = None
                for _, fp, fp_type in artifact.filepaths:
                    if fp_type == 'biom':
                        biom_table_fp = fp
                        break
                if not biom_table_fp:
                    raise RuntimeError(
                        "Artifact %s do not have a biom table associated" %
                        aid)
                biom_table = load_table(biom_table_fp)
                # filtering samples to keep those selected by the user
                biom_table_samples = set(biom_table.ids())
                selected_samples = biom_table_samples.intersection(samps)
                biom_table.filter(selected_samples,
                                  axis='sample',
                                  inplace=True)

                if rename_dup_samples:
                    ids_map = {
                        _id: "%d.%s" % (aid, _id)
                        for _id in biom_table.ids()
                    }
                    biom_table.update_ids(ids_map, 'sample', True, True)

                # add the metadata column for study the samples come from,
                # this is useful in case the user download the bioms
                study_md = {'Study': artifact.study.title, 'Artifact_id': aid}
                samples_md = {sid: study_md for sid in selected_samples}
                biom_table.add_metadata(samples_md, axis='sample')
                data_type = artifact.data_type

                # this is not checking the reference used for picking
                # issue #164
                if new_tables[data_type] is None:
                    new_tables[data_type] = biom_table
                else:
                    new_tables[data_type] = \
                        new_tables[data_type].merge(biom_table)

            # add the new tables to the analysis
            _, base_fp = qdb.util.get_mountpoint(self._table)[0]
            for dt, biom_table in viewitems(new_tables):
                if biom_table is None:
                    continue
                # rarefy, if specified
                if rarefaction_depth is not None:
                    biom_table = biom_table.subsample(rarefaction_depth)
                # write out the file
                biom_fp = join(base_fp, "%d_analysis_%s.biom" % (self._id, dt))
                with biom_open(biom_fp, 'w') as f:
                    biom_table.to_hdf5(
                        f, "Analysis %s Datatype %s" % (self._id, dt))
                self._add_file("%d_analysis_%s.biom" % (self._id, dt),
                               "biom",
                               data_type=dt)
Esempio n. 36
0
 def __init__(self, symbols):
     tmp = {}
     for expr, types in viewitems(symbols):
         tmp[expr] = frozenset(types)
     self._symbols = frozenset(viewitems(tmp))
Esempio n. 37
0
def artifact_post_req(user_id,
                      filepaths,
                      artifact_type,
                      name,
                      prep_template_id,
                      artifact_id=None):
    """Creates the initial artifact for the prep template

    Parameters
    ----------
    user_id : str
        User adding the atrifact
    filepaths : dict of str
        Comma-separated list of files to attach to the artifact,
        keyed by file type
    artifact_type : str
        The type of the artifact
    name : str
        Name to give the artifact
    prep_template_id : int or str castable to int
        Prep template to attach the artifact to
    artifact_id : int or str castable to int, optional
        The id of the imported artifact

    Returns
    -------
    dict of objects
        A dictionary containing the new artifact ID
        {'status': status,
         'message': message,
         'artifact': id}
    """
    prep_template_id = int(prep_template_id)
    prep = PrepTemplate(prep_template_id)
    study_id = prep.study_id

    # First check if the user has access to the study
    access_error = check_access(study_id, user_id)
    if access_error:
        return access_error

    user = User(user_id)

    if artifact_id:
        # if the artifact id has been provided, import the artifact
        qiita_plugin = Software.from_name_and_version('Qiita', 'alpha')
        cmd = qiita_plugin.get_command('copy_artifact')
        params = Parameters.load(cmd,
                                 values_dict={
                                     'artifact': artifact_id,
                                     'prep_template': prep.id
                                 })
        job = ProcessingJob.create(user, params)
    else:
        uploads_path = get_mountpoint('uploads')[0][1]
        path_builder = partial(join, uploads_path, str(study_id))
        cleaned_filepaths = {}

        for ftype, file_list in viewitems(filepaths):
            # JavaScript sends us this list as a comma-separated list
            for fp in file_list.split(','):
                # JavaScript will send this value as an empty string if the
                # list of files was empty. In such case, the split will
                # generate a single element containing the empty string. Check
                # for that case here and, if fp is not the empty string,
                # proceed to check if the file exists
                if fp:
                    # Check if filepath being passed exists for study
                    full_fp = path_builder(fp)
                    exists = check_fp(study_id, full_fp)
                    if exists['status'] != 'success':
                        return {
                            'status': 'error',
                            'message': 'File does not exist: %s' % fp
                        }
                    if ftype not in cleaned_filepaths:
                        cleaned_filepaths[ftype] = []
                    cleaned_filepaths[ftype].append(full_fp)

        # This should never happen, but it doesn't hurt to actually have
        # a explicit check, in case there is something odd with the JS
        if not cleaned_filepaths:
            return {
                'status': 'error',
                'message': "Can't create artifact, no files provided."
            }

        command = Command.get_validator(artifact_type)
        job = ProcessingJob.create(
            user,
            Parameters.load(command,
                            values_dict={
                                'template': prep_template_id,
                                'files': dumps(cleaned_filepaths),
                                'artifact_type': artifact_type,
                                'name': name
                            }))

    # Submit the job
    job.submit()

    r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep.id,
                 dumps({
                     'job_id': job.id,
                     'is_qiita_job': True
                 }))

    return {'status': 'success', 'message': ''}
Esempio n. 38
0
def raster_calc(output,
                equation=None,
                out_type='byte',
                extent=None,
                overwrite=False,
                be_quiet=False,
                out_no_data=0,
                row_block_size=2000,
                col_block_size=2000,
                apply_all_bands=False,
                **kwargs):
    """
    Raster calculator

    Args:
        output (str): The output image.
        equation (Optional[str]): The equation to calculate.
        out_type (Optional[str]): The output raster storage type. Default is 'byte'.
        extent (Optional[str]): An image or instance of ``mappy.ropen`` to use for the output extent. Default is None.
        overwrite (Optional[bool]): Whether to overwrite an existing IDW image. Default is False.
        be_quiet (Optional[bool]): Whether to be quiet and do not report progress. Default is False.
        out_no_data (Optional[int]): The output no data value. Default is 0.
        row_block_size (Optional[int]): The row block chunk size. Default is 2000.
        col_block_size (Optional[int]): The column block chunk size. Default is 2000.
        apply_all_bands (Optional[bool]): Whether to apply the equation to all bands. Default is False.
        **kwargs (str): The rasters to compute. E.g., A='/some_raster1.tif', F='/some_raster2.tif'.
            Band positions default to 1 unless given as [A]_band.

    Examples:
        >>> from mpglue.raster_calc import raster_calc
        >>>
        >>> # Multiply image A x image B
        >>> raster_calc('/output.tif',
        >>>             equation='A * B',
        >>>             A='/some_raster1.tif',
        >>>             B='some_raster2.tif')
        >>>
        >>> # Reads as...
        >>> # Where image A equals 1 AND image B is greater than 5,
        >>> #   THEN write image A, OTHERWISE write 0
        >>> raster_calc('/output.tif',
        >>>             equation='where((A == 1) & (B > 5), A, 0)',
        >>>             A='/some_raster1.tif',
        >>>             B='some_raster2.tif')
        >>>
        >>> # Use different bands from the same image. The letter given for the
        >>> #   image must be the same for the band, followed by _band.
        >>> # E.g., for raster 'n', the corresponding band would be 'n_band'. For
        >>> #   raster 'r', the corresponding band would be 'r_band', etc.
        >>> raster_calc('/output.tif',
        >>>             equation='(n - r) / (n + r)',
        >>>             n='/some_raster.tif',
        >>>             n_band=4,
        >>>             r='/some_raster.tif',
        >>>             r_band=3)

    Returns:
        None, writes to ``output``.
    """

    # Set the image dictionary
    image_dict = dict()
    info_dict = dict()
    info_list = list()
    band_dict = dict()

    temp_files = list()

    if isinstance(extent, str):

        ot_info = raster_tools.ropen(extent)

        temp_dict = copy(kwargs)

        for kw, vw in viewitems(kwargs):

            if isinstance(vw, str):

                d_name, f_name = os.path.split(vw)
                f_base, __ = os.path.splitext(f_name)

                vw_sub = os.path.join(d_name, '{}_temp.vrt'.format(f_base))

                raster_tools.translate(vw,
                                       vw_sub,
                                       format='VRT',
                                       projWin=[
                                           ot_info.left, ot_info.top,
                                           ot_info.right, ot_info.bottom
                                       ])

                temp_files.append(vw_sub)

                temp_dict[kw] = vw_sub

        kwargs = temp_dict

    for kw, vw in viewitems(kwargs):

        if '_band' not in kw:
            band_dict['{}_band'.format(kw)] = 1

        if isinstance(vw, str):

            image_dict[kw] = vw

            exec('i_info_{} = raster_tools.ropen(r"{}")'.format(kw, vw))
            exec('info_dict["{}"] = i_info_{}'.format(kw, kw))
            exec('info_list.append(i_info_{})'.format(kw))

        if isinstance(vw, int):
            band_dict[kw] = vw

    for key, value in viewitems(image_dict):
        equation = equation.replace(key, 'marrvar_{}'.format(key))

    # Check for NumPy functions.
    # for np_func in dir(np):
    #
    #     if 'np.' + np_func in equation:
    #
    #         equation = 'np.{}'.format(equation)
    #         break

    for kw, vw in viewitems(info_dict):

        o_info = copy(vw)
        break

    n_bands = 1 if not apply_all_bands else o_info.bands

    if isinstance(extent, raster_tools.ropen):

        # Set the extent from an object.
        overlap_info = extent

    elif isinstance(extent, str):

        # Set the extent from an existing image.
        overlap_info = raster_tools.ropen(extent)

    else:

        # Check overlapping extent
        overlap_info = info_list[0].copy()

        for i_ in range(1, len(info_list)):

            # Get the minimum overlapping extent
            # from all input images.
            overlap_info = raster_tools.GetMinExtent(overlap_info,
                                                     info_list[i_])

    o_info.update_info(left=overlap_info.left,
                       right=overlap_info.right,
                       top=overlap_info.top,
                       bottom=overlap_info.bottom,
                       rows=overlap_info.rows,
                       cols=overlap_info.cols,
                       storage=out_type,
                       bands=n_bands)

    if overwrite:
        overwrite_file(output)

    out_rst = raster_tools.create_raster(output, o_info)

    if n_bands == 1:
        out_rst.get_band(1)

    block_rows, block_cols = raster_tools.block_dimensions(
        o_info.rows,
        o_info.cols,
        row_block_size=row_block_size,
        col_block_size=col_block_size)

    if not be_quiet:
        ctr, pbar = _iteration_parameters(o_info.rows, o_info.cols, block_rows,
                                          block_cols)

    # Iterate over the minimum overlapping extent.
    for i in range(0, o_info.rows, block_rows):

        n_rows = raster_tools.n_rows_cols(i, block_rows, o_info.rows)

        for j in range(0, o_info.cols, block_cols):

            n_cols = raster_tools.n_rows_cols(j, block_cols, o_info.cols)

            # For each image, get the offset and
            # convert bands in the equation to ndarrays.
            for key, value in viewitems(image_dict):

                # exec 'x_off, y_off = vector_tools.get_xy_offsets3(overlap_info, i_info_{})'.format(key)
                x_off, y_off = vector_tools.get_xy_offsets(
                    image_info=info_dict[key],
                    x=overlap_info.left,
                    y=overlap_info.top,
                    check_position=False)[2:]

                exec(
                    'marrvar_{KEY} = info_dict["{KEY}"].read(bands2open=band_dict["{KEY}_band"], i=i+y_off, j=j+x_off, rows=n_rows, cols=n_cols, d_type="float32")'
                    .format(KEY=key))

            if '&&' in equation:

                out_array = np.empty((n_bands, n_rows, n_cols),
                                     dtype='float32')

                for eqidx, equation_ in enumerate(equation.split('&&')):

                    if 'nan_to_num' in equation_:

                        if not equation_.startswith('np.'):
                            equation_ = 'np.' + equation_

                        equation_ = 'out_array[eqidx] = {}'.format(equation_)
                        exec(equation_)

                    else:
                        out_array[eqidx] = ne.evaluate(equation_)

            else:

                if 'nan_to_num' in equation:

                    equation_ = 'out_array = {}'.format(equation)
                    exec(equation_)

                else:
                    out_array = ne.evaluate(equation)

            # Set the output no data values.
            out_array[np.isnan(out_array) | np.isinf(out_array)] = out_no_data

            if n_bands == 1:

                out_rst.write_array(out_array, i=i, j=j)

            else:

                for lidx in range(0, n_bands):

                    out_rst.write_array(out_array[lidx],
                                        i=i,
                                        j=j,
                                        band=lidx + 1)

            if not be_quiet:

                pbar.update(ctr)
                ctr += 1

    if not be_quiet:
        pbar.finish()

    # Close the input image.
    for key, value in viewitems(info_dict):
        info_dict[key].close()

    # close the output drivers
    out_rst.close_all()

    out_rst = None

    # Cleanup
    for temp_file in temp_files:

        if os.path.isfile(temp_file):
            os.remove(temp_file)
Esempio n. 39
0
    def search_engine(self, item_name, criteria):
        """
        Call GLPI's search engine syntax.

        INPUT query in JSON format (/apirest.php#search-items):
        metacriteria: [
            {
                "link": 'AND'
                "searchtype": "contais",
                "field": "name",
                "value": "search value"
            }
        ]

        RETURNS:
            GLPIs APIRest JSON formated with result of search in key 'data'.
        """

        # Receive the possible field ids for type item_name
        # -> to avoid wrong lookups, use uid of fields, but strip item type:
        #    example: {"1": {"uid": "Computer.name"}} gets {"name": 1}
        field_map = {}
        opts = self.search_options(item_name)
        raise Exception(opts)
        for field_id, field_opts in viewitems(opts):
            if field_id.isdigit() and 'uid' in field_opts:
                # support case-insensitive strip from item_name!
                field_name = re.sub('^' + item_name + '.',
                                    '',
                                    field_opts['uid'],
                                    flags=re.IGNORECASE)
                field_map[field_name] = int(field_id)

        uri_query = "%s?" % item_name

        for idx, c in enumerate(criteria['criteria']):
            # build field argument
            if idx == 0:
                uri = ""
            else:
                uri = "&"
            if 'field' in c and c['field'] is not None:
                field_name = ""
                # if int given, use it directly
                if isinstance(c['field'], int) or c['field'].isdigit():
                    field_name = int(c['field'])
                # if name given, try to map to an int
                elif c['field'] in field_map:
                    field_name = field_map[c['field']]
                else:
                    raise GlpiInvalidArgument('Cannot map field name "' +
                                              c['field'] + '" to ' +
                                              'a field id for ' +
                                              str(idx + 1) + '. criterion ' +
                                              str(c))
                uri = uri + "criteria[%d][field]=%d" % (idx, field_name)
            else:
                raise GlpiInvalidArgument('Missing "field" parameter for ' +
                                          str(idx + 1) + 'the criteria: ' +
                                          str(c))

            # build value argument
            if 'value' not in c or c['value'] is None:
                uri = uri + "&criteria[%d][value]=" % (idx)
            else:
                uri = uri + "&criteria[%d][value]=%s" % (idx, c['value'])

            # build searchtype argument
            # -> optional! defaults to "contains" on the server if empty
            if 'searchtype' in c and c['searchtype'] is not None:
                uri = (uri + "&criteria[%d][searchtype]=%s".format(
                    idx, c['searchtype']))
            else:
                uri = uri + "&criteria[%d][searchtype]=" % (idx)

            # link is optional for 1st criterion according to docs...
            # -> error if not present but more than one criterion
            if 'link' not in c and idx > 0:
                raise GlpiInvalidArgument('Missing link type for ' +
                                          str(idx + 1) + '. criterion ' +
                                          str(c))
            elif 'link' in c:
                uri = uri + "&criteria[%d][link]=%s" % (idx, c['link'])

            # add this criterion to the query
            uri_query = uri_query + uri

        try:
            if not self.api_has_session():
                self.init_api()

            self.update_uri('search')
            # TODO: is this call correct? shouldn't this be search_engine()?
            return self.api_rest.search_options(uri_query)

        except GlpiException as e:
            return {'{}'.format(e)}
Esempio n. 40
0
def displayhost(record,
                showscripts=True,
                showtraceroute=True,
                showos=True,
                out=sys.stdout):
    """Displays (on `out`, by default `sys.stdout`) the Nmap scan
    result contained in `record`.

    """
    line = "Host %s" % utils.force_int2ip(record['addr'])
    if record.get('hostnames'):
        line += " (%s)" % '/'.join(x['name'] for x in record['hostnames'])
    if 'source' in record:
        line += ' from %s' % record['source']
    if record.get('categories'):
        line += ' (%s)' % ', '.join(record['categories'])
    if 'state' in record:
        line += ' (%s' % record['state']
        if 'state_reason' in record:
            line += ': %s' % record['state_reason']
        line += ')\n'
    out.write(line)
    if 'infos' in record:
        infos = record['infos']
        if 'country_code' in infos or 'country_name' in infos:
            out.write("\t%s - %s" % (infos.get(
                'country_code', '?'), infos.get('country_name', '?')))
            if 'city' in infos:
                out.write(' - %s' % infos['city'])
            out.write('\n')
        if 'as_num' in infos or 'as_name' in infos:
            out.write("\tAS%s - %s\n" %
                      (infos.get('as_num', '?'), infos.get('as_name', '?')))
    if 'starttime' in record and 'endtime' in record:
        out.write("\tscan %s - %s\n" %
                  (record['starttime'], record['endtime']))
    for state, counts in viewitems(record.get('extraports', {})):
        out.write("\t%d ports %s (%s)\n" % (counts["total"], state, ", ".join(
            "%d %s" % (count, reason)
            for reason, count in viewitems(counts["reasons"])
            if reason != "total")))
    ports = record.get('ports', [])
    ports.sort(
        key=lambda x: (utils.key_sort_none(x.get('protocol')), x['port']))
    for port in ports:
        if port.get('port') == -1:
            record['scripts'] = port['scripts']
            continue
        if 'state_reason' in port:
            reason = " (%s)" % ', '.join([port['state_reason']] + [
                "%s=%s" % (field[13:], value)
                for field, value in viewitems(port)
                if field.startswith('state_reason_')
            ])
        else:
            reason = ""
        if 'service_name' in port:
            srv = port['service_name']
            if 'service_method' in port:
                srv += ' (%s)' % port['service_method']
            for field in [
                    'service_product', 'service_version', 'service_extrainfo',
                    'service_ostype', 'service_hostname'
            ]:
                if field in port:
                    srv += ' %s' % port[field]
        else:
            srv = ""
        out.write("\t%-10s%-8s%-22s%s\n" %
                  ('%s/%d' % (port.get('protocol'), port['port']),
                   port['state_state'], reason, srv))
        if showscripts:
            out.writelines(_scriptoutput(port))
    if showscripts:
        scripts = _scriptoutput(record)
        if scripts:
            out.write('\tHost scripts:\n')
            out.writelines(scripts)
    if showtraceroute and record.get('traces'):
        for trace in record['traces']:
            proto = trace['protocol']
            if proto in ['tcp', 'udp']:
                proto += '/%d' % trace['port']
            out.write('\tTraceroute (using %s)\n' % proto)
            hops = trace['hops']
            hops.sort(key=lambda hop: hop['ttl'])
            for hop in hops:
                out.write('\t\t%3s %15s %7s\n' % (
                    hop['ttl'],
                    utils.force_int2ip(hop['ipaddr']),
                    hop['rtt'],
                ))
    if showos and record.get('os', {}).get('osclass'):
        osclasses = record['os']['osclass']
        maxacc = str(max(int(x['accuracy']) for x in osclasses))
        osclasses = [
            osclass for osclass in osclasses if osclass['accuracy'] == maxacc
        ]
        out.write('\tOS fingerprint\n')
        for osclass in osclasses:
            out.write('\t\t%(osfamily)s / %(type)s / %(vendor)s / '
                      'accuracy = %(accuracy)s\n' % osclass)
Esempio n. 41
0
def build_graph(start_addr,
                type_graph,
                simplify=False,
                use_ida_stack=True,
                dontmodstack=False,
                loadint=False,
                verbose=False):
    machine = guess_machine(addr=start_addr)
    dis_engine, ira = machine.dis_engine, machine.ira

    class IRADelModCallStack(ira):
        def call_effects(self, addr, instr):
            assignblks, extra = super(IRADelModCallStack,
                                      self).call_effects(addr, instr)
            if use_ida_stack:
                stk_before = idc.get_spd(instr.offset)
                stk_after = idc.get_spd(instr.offset + instr.l)
                stk_diff = stk_after - stk_before
                print(hex(stk_diff))
                call_assignblk = AssignBlock([
                    ExprAssign(self.ret_reg, ExprOp('call_func_ret', addr)),
                    ExprAssign(self.sp,
                               self.sp + ExprInt(stk_diff, self.sp.size))
                ], instr)
                return [call_assignblk], []
            else:
                if not dontmodstack:
                    return assignblks, extra
                out = []
                for assignblk in assignblks:
                    dct = dict(assignblk)
                    dct = {
                        dst: src
                        for (dst, src) in viewitems(dct) if dst != self.sp
                    }
                    out.append(AssignBlock(dct, assignblk.instr))
            return out, extra

    if verbose:
        print("Arch", dis_engine)

    fname = idc.get_root_filename()
    if verbose:
        print(fname)

    bs = bin_stream_ida()
    loc_db = LocationDB()

    mdis = dis_engine(bs, loc_db=loc_db)
    ir_arch = IRADelModCallStack(loc_db)

    # populate symbols with ida names
    for addr, name in idautils.Names():
        if name is None:
            continue
        if (loc_db.get_offset_location(addr)
                or loc_db.get_name_location(name)):
            # Symbol alias
            continue
        loc_db.add_location(name, addr)

    if verbose:
        print("start disasm")
    if verbose:
        print(hex(start_addr))

    asmcfg = mdis.dis_multiblock(start_addr)
    entry_points = set([loc_db.get_offset_location(start_addr)])
    if verbose:
        print("generating graph")
        open('asm_flow.dot', 'w').write(asmcfg.dot())
        print("generating IR... %x" % start_addr)

    ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg)

    if verbose:
        print("IR ok... %x" % start_addr)

    for irb in list(viewvalues(ircfg.blocks)):
        irs = []
        for assignblk in irb:
            new_assignblk = {
                expr_simp(dst): expr_simp(src)
                for dst, src in viewitems(assignblk)
            }
            irs.append(AssignBlock(new_assignblk, instr=assignblk.instr))
        ircfg.blocks[irb.loc_key] = IRBlock(loc_db, irb.loc_key, irs)

    if verbose:
        out = ircfg.dot()
        open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out)
    title = "Miasm IR graph"

    head = list(entry_points)[0]

    if simplify:
        ircfg_simplifier = IRCFGSimplifierCommon(ir_arch)
        ircfg_simplifier.simplify(ircfg, head)
        title += " (simplified)"

    if type_graph == TYPE_GRAPH_IR:
        graph = GraphMiasmIR(ircfg, title, None)
        graph.Show()
        return

    class IRAOutRegs(ira):
        def get_out_regs(self, block):
            regs_todo = super(IRAOutRegs, self).get_out_regs(block)
            out = {}
            for assignblk in block:
                for dst in assignblk:
                    reg = self.ssa_var.get(dst, None)
                    if reg is None:
                        continue
                    if reg in regs_todo:
                        out[reg] = dst
            return set(viewvalues(out))

    # Add dummy dependency to uncover out regs affectation
    for loc in ircfg.leaves():
        irblock = ircfg.blocks.get(loc)
        if irblock is None:
            continue
        regs = {}
        for reg in ir_arch.get_out_regs(irblock):
            regs[reg] = reg
        assignblks = list(irblock)
        new_assiblk = AssignBlock(regs, assignblks[-1].instr)
        assignblks.append(new_assiblk)
        new_irblock = IRBlock(irblock.loc_db, irblock.loc_key, assignblks)
        ircfg.blocks[loc] = new_irblock

    class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA):
        def do_simplify(self, ssa, head):
            modified = super(CustomIRCFGSimplifierSSA,
                             self).do_simplify(ssa, head)
            if loadint:
                modified |= load_from_int(ssa.graph, bs, is_addr_ro_variable)
            return modified

        def simplify(self, ircfg, head):
            ssa = self.ircfg_to_ssa(ircfg, head)
            ssa = self.do_simplify_loop(ssa, head)

            if type_graph == TYPE_GRAPH_IRSSA:
                ret = ssa.graph
            elif type_graph == TYPE_GRAPH_IRSSAUNSSA:
                ircfg = self.ssa_to_unssa(ssa, head)
                ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch)
                ircfg_simplifier.simplify(ircfg, head)
                ret = ircfg
            else:
                raise ValueError("Unknown option")
            return ret

    head = list(entry_points)[0]
    simplifier = CustomIRCFGSimplifierSSA(ir_arch)
    ircfg = simplifier.simplify(ircfg, head)
    open('final.dot', 'w').write(ircfg.dot())

    graph = GraphMiasmIR(ircfg, title, None)
    graph.Show()
Esempio n. 42
0
    def _build_mapping_file(self, samples, rename_dup_samples=False):
        """Builds the combined mapping file for all samples
           Code modified slightly from qiime.util.MetadataMap.__add__"""
        with qdb.sql_connection.TRN:
            # query to get the latest qiime mapping file
            sql = """SELECT filepath
                     FROM qiita.filepath
                        JOIN qiita.prep_template_filepath USING (filepath_id)
                        JOIN qiita.prep_template USING (prep_template_id)
                        JOIN qiita.filepath_type USING (filepath_type_id)
                     WHERE filepath_type = 'qiime_map'
                        AND artifact_id IN (SELECT *
                                            FROM qiita.find_artifact_roots(%s))
                     ORDER BY filepath_id DESC LIMIT 1"""
            _, fp = qdb.util.get_mountpoint('templates')[0]

            all_ids = set()
            to_concat = []
            for aid, samps in viewitems(samples):
                qdb.sql_connection.TRN.add(sql, [aid])
                qm_fp = qdb.sql_connection.TRN.execute_fetchindex()[0][0]

                # Parse the mapping file
                qm = qdb.metadata_template.util.load_template_to_dataframe(
                    join(fp, qm_fp), index='#SampleID')

                # if we are not going to merge the duplicated samples
                # append the aid to the sample name
                if rename_dup_samples:
                    qm['original_SampleID'] = qm.index
                    qm['#SampleID'] = "%d." % aid + qm.index
                    qm['qiita_aid'] = aid
                    samps = ['%d.%s' % (aid, _id) for _id in samps]
                    qm.set_index('#SampleID', inplace=True, drop=True)
                else:
                    samps = set(samps) - all_ids
                    all_ids.update(samps)

                qm = qm.loc[samps]
                to_concat.append(qm)

            merged_map = pd.concat(to_concat)

            # forcing QIIME column order
            cols = merged_map.columns.values.tolist()
            cols.remove('BarcodeSequence')
            cols.remove('LinkerPrimerSequence')
            cols.remove('Description')
            cols = (['BarcodeSequence', 'LinkerPrimerSequence'] + cols +
                    ['Description'])
            merged_map = merged_map[cols]

            # Save the mapping file
            _, base_fp = qdb.util.get_mountpoint(self._table)[0]
            mapping_fp = join(base_fp, "%d_analysis_mapping.txt" % self._id)
            merged_map.to_csv(mapping_fp,
                              index_label='#SampleID',
                              na_rep='unknown',
                              sep='\t')

            self._add_file("%d_analysis_mapping.txt" % self._id, "plain_text")
Esempio n. 43
0
 def populate(self, mtime):
     self._mtime = mtime
     self.collection.subscribe(self.on_event)
     for entry, item in viewitems(self.collection):
         self.new_entry(entry, item, self.mtime())
Esempio n. 44
0
 def items(self):
     return viewitems(self._entries.items())
Esempio n. 45
0
def main():
    try:
        import argparse
        parser = argparse.ArgumentParser(description=DESCRIPTION)
    except ImportError:
        import optparse
        parser = optparse.OptionParser(description=DESCRIPTION)
        parser.parse_args_orig = parser.parse_args
        parser.parse_args = lambda: parser.parse_args_orig()[0]
        parser.add_argument = parser.add_option
    parser.add_argument('--init',
                        '--purgedb',
                        action='store_true',
                        help='Purge or create and initialize the database.')
    parser.add_argument(
        '--ensure-indexes',
        action='store_true',
        help='Create missing indexes (will lock the database).')
    parser.add_argument('--node-filters',
                        '-n',
                        nargs="+",
                        help='Filter the results with a list of ivre specific '
                        'node textual filters (see WebUI doc in FLOW.md).')
    parser.add_argument('--flow-filters',
                        '-f',
                        nargs="+",
                        help='Filter the results with a list of ivre specific '
                        'flow textual filters (see WebUI doc in FLOW.md).')
    parser.add_argument('--json',
                        '-j',
                        action='store_true',
                        help='Outputs the full json records of results.')
    parser.add_argument('--count',
                        '-c',
                        action='store_true',
                        help='Only return the count of the results.')
    parser.add_argument('--limit',
                        '-l',
                        type=int,
                        help='Ouput at most LIMIT results.')
    parser.add_argument('--skip',
                        type=int,
                        default=0,
                        help='Skip first SKIP results.')
    parser.add_argument('--orderby',
                        '-o',
                        help='Order of results ("src", "dst" or "flow")')
    parser.add_argument('--separator', '-s', help="Separator string.")
    parser.add_argument('--top',
                        '-t',
                        nargs="+",
                        help='Top flows for a given set of fields, e.g. '
                        '"--top src.addr dport".')
    parser.add_argument(
        '--collect',
        '-C',
        nargs="+",
        help='When using --top, also collect these properties.')
    parser.add_argument('--sum',
                        '-S',
                        nargs="+",
                        help='When using --top, sum on these properties to '
                        'order the result.')
    parser.add_argument('--mode',
                        '-m',
                        help="Query special mode (flow_map, talk_map...)")
    parser.add_argument('--timeline',
                        '-T',
                        action="store_true",
                        help='Retrieves the timeline of each flow')
    parser.add_argument('--flow-daily',
                        action="store_true",
                        help="Flow count per times of the day")
    parser.add_argument('--plot',
                        action="store_true",
                        help="Plot data when possible (requires matplotlib).")
    parser.add_argument('--fields',
                        nargs='+',
                        help="Display these fields for each entry.")
    args = parser.parse_args()

    out = sys.stdout

    if args.plot and plt is None:
        utils.LOGGER.critical("Matplotlib is required for --plot")
        sys.exit(-1)

    if args.init:
        if os.isatty(sys.stdin.fileno()):
            out.write('This will remove any scan result in your database. '
                      'Process ? [y/N] ')
            ans = input()
            if ans.lower() != 'y':
                sys.exit(-1)
        db.flow.init()
        sys.exit(0)

    if args.ensure_indexes:
        if os.isatty(sys.stdin.fileno()):
            out.write('This will lock your database. ' 'Process ? [y/N] ')
            ans = input()
            if ans.lower() != 'y':
                sys.exit(-1)
        db.flow.ensure_indexes()
        sys.exit(0)

    filters = {
        "nodes": args.node_filters or [],
        "edges": args.flow_filters or []
    }

    query = db.flow.from_filters(filters,
                                 limit=args.limit,
                                 skip=args.skip,
                                 orderby=args.orderby,
                                 mode=args.mode,
                                 timeline=args.timeline)
    sep = args.separator or ' | '
    coma = ';' if args.separator else '; '
    coma2 = ',' if args.separator else ', '
    if args.count:
        count = db.flow.count(query)
        out.write('%(clients)d clients\n%(servers)d servers\n'
                  '%(flows)d flows\n' % count)

    elif args.top:
        top = db.flow.top(query, args.top, args.collect, args.sum)
        for rec in top:
            sys.stdout.write(
                "%s%s%s%s%s\n" %
                (coma.join(str(elt)
                           for elt in rec["fields"]), sep, rec["count"], sep,
                 coma.join(
                     str(coma2.join(str(val) for val in elt))
                     for elt in rec["collected"]) if rec["collected"] else ""))

    elif args.flow_daily:
        # FIXME? fully in-memory
        if args.plot:
            plot_data = {}
        for rec in db.flow.flow_daily(query):
            out.write(
                sep.join([
                    rec["flow"], rec["time_in_day"].strftime("%T.%f"),
                    str(rec["count"])
                ]))
            out.write("\n")

            if args.plot:
                plot_data.setdefault(rec["flow"], [[], []])
                plot_data[rec["flow"]][0].append(rec["time_in_day"])
                plot_data[rec["flow"]][1].append(rec["count"])
        for flow, points in viewitems(plot_data):
            plt.plot(points[0], points[1], label=flow)
        plt.legend(loc='best')
        plt.show()

    else:
        fmt = '%%s%s%%s%s%%s' % (sep, sep)
        node_width = len('XXX.XXX.XXX.XXX')
        flow_width = len('tcp/XXXXX')
        for res in db.flow.to_iter(query):
            if args.json:
                out.write('%s\n' % res)
            else:
                elts = {}
                for elt in ["src", "flow", "dst"]:
                    elts[elt] = res[elt]['label']
                    if args.fields:
                        elts[elt] = "%s%s%s" % (
                            elts[elt], coma,
                            coma.join(
                                str(res[elt]['data'].get(field, ""))
                                for field in args.fields))
                src, flow, dst = elts["src"], elts["flow"], elts["dst"]
                node_width = max(node_width, len(src), len(dst))
                flow_width = max(flow_width, len(flow))
                if not args.separator:
                    fmt = ('%%-%ds%s%%-%ds%s%%-%ds' %
                           (node_width, sep, flow_width, sep, node_width))
                out.write(fmt % (src, flow, dst))
                if args.timeline:
                    out.write(sep)
                    out.write(
                        coma.join(
                            str(elt) for elt in sorted(res['flow']['data']
                                                       ['meta']['times'])))
                out.write('\n')
Esempio n. 46
0
 def flatten_contigs(data):
     for name, windows in viewitems(data):
         for index, window in enumerate(windows):
             yield (name, index), dict(window)
Esempio n. 47
0
def recurrent_net(
        net, cell_net, inputs, initial_cell_inputs,
        links, timestep=None, scope=None, outputs_with_grads=(0,),
        recompute_blobs_on_backward=None, forward_only=False,
):
    '''
    net: the main net operator should be added to

    cell_net: cell_net which is executed in a recurrent fasion

    inputs: sequences to be fed into the recurrent net. Currently only one input
    is supported. It has to be in a format T x N x (D1...Dk) where T is lengths
    of the sequence. N is a batch size and (D1...Dk) are the rest of dimentions

    initial_cell_inputs: inputs of the cell_net for the 0 timestamp.
    Format for each input is:
        (cell_net_input_name, external_blob_with_data)

    links: a dictionary from cell_net input names in moment t+1 and
    output names of moment t. Currently we assume that each output becomes
    an input for the next timestep.

    timestep: name of the timestep blob to be used. If not provided "timestep"
    is used.

    scope: Internal blobs are going to be scoped in a format
    <scope_name>/<blob_name>
    If not provided we generate a scope name automatically

    outputs_with_grads : position indices of output blobs which will receive
    error gradient (from outside recurrent network) during backpropagation

    recompute_blobs_on_backward: specify a list of blobs that will be
                 recomputed for backward pass, and thus need not to be
                 stored for each forward timestep.

    forward_only: if True, only forward steps are executed
    '''
    assert len(inputs) == 1, "Only one input blob is supported so far"

    # Validate scoping
    for einp in cell_net.Proto().external_input:
        assert einp.startswith(CurrentNameScope()), \
            '''
            Cell net external inputs are not properly scoped, use
            AddScopedExternalInputs() when creating them
            '''

    input_blobs = [str(i[0]) for i in inputs]
    initial_input_blobs = [str(x[1]) for x in initial_cell_inputs]
    op_name = net.NextName('recurrent')

    def s(name):
        # We have to manually scope due to our internal/external blob
        # relationships.
        scope_name = op_name if scope is None else scope
        return "{}/{}".format(str(scope_name), str(name))

    # determine inputs that are considered to be references
    # it is those that are not referred to in inputs or initial_cell_inputs
    known_inputs = [str(b) for b in input_blobs + initial_input_blobs]
    known_inputs += [str(x[0]) for x in initial_cell_inputs]
    if timestep is not None:
        known_inputs.append(str(timestep))
    references = [
        core.BlobReference(b) for b in cell_net.Proto().external_input
        if b not in known_inputs]

    inner_outputs = list(cell_net.Proto().external_output)
    # These gradients are expected to be available during the backward pass
    inner_outputs_map = {o: o + '_grad' for o in inner_outputs}

    # compute the backward pass of the cell net
    if not forward_only:
        backward_ops, backward_mapping = core.GradientRegistry.GetBackwardPass(
            cell_net.Proto().op, inner_outputs_map)
        backward_mapping = {str(k): v for k, v in viewitems(backward_mapping)}

        backward_cell_net = core.Net("RecurrentBackwardStep")
        del backward_cell_net.Proto().op[:]

        if recompute_blobs_on_backward is not None:
            # Insert operators to re-compute the specified blobs.
            # They are added in the same order as for the forward pass, thus
            # the order is correct.
            recompute_blobs_on_backward = {str(b) for b in
                                           recompute_blobs_on_backward}

            for op in cell_net.Proto().op:
                if not recompute_blobs_on_backward.isdisjoint(set(op.output)):
                    backward_cell_net.Proto().op.extend([op])
                    # This fires if other outputs than the declared
                    # are computed by the ops that are recomputed
                    assert set(op.output).issubset(recompute_blobs_on_backward)

        backward_cell_net.Proto().op.extend(backward_ops)
        # compute blobs used but not defined in the backward pass
        backward_ssa, backward_blob_versions = core.get_ssa(
            backward_cell_net.Proto())
        undefined = core.get_undefined_blobs(backward_ssa)

        # also add to the output list the intermediate outputs of fwd_step that
        # are used by backward.
        ssa, blob_versions = core.get_ssa(cell_net.Proto())
        scratches = [
            blob
            for blob, ver in viewitems(blob_versions)
            if (ver > 0 and
                blob in undefined and
                blob not in cell_net.Proto().external_output)
        ]
        backward_cell_net.Proto().external_input.extend(scratches)
        backward_cell_net.Proto().type = 'simple'
    else:
        backward_cell_net = None

    all_inputs = [i[1] for i in inputs] + [
        x[1] for x in initial_cell_inputs] + references
    all_outputs = []

    cell_net.Proto().type = 'rnn'

    # Internal arguments used by RecurrentNetwork operator

    # Links are in the format blob_name, recurrent_states, offset.
    # In the moment t we know that corresponding data block is at
    # t + offset position in the recurrent_states tensor
    forward_links = []
    backward_links = []

    # Aliases are used to expose outputs to external world
    # Format (internal_blob, external_blob, offset)
    # Negative offset stands for going from the end,
    # positive - from the beginning
    aliases = []

    # States held inputs to the cell net
    recurrent_states = []

    for cell_input, _ in initial_cell_inputs:
        cell_input = str(cell_input)
        # Recurrent_states is going to be (T + 1) x ...
        # It stores all inputs and outputs of the cell net over time.
        # Or their gradients in the case of the backward pass.
        state = s(cell_input + "_states")
        states_grad = state + "_grad"
        cell_output = links[str(cell_input)]
        forward_links.append((cell_input, state, 0))
        forward_links.append((cell_output, state, 1))

        aliases.append((state, cell_output + "_all", 1))
        aliases.append((state, cell_output + "_last", -1))
        all_outputs.extend([cell_output + "_all", cell_output + "_last"])

        recurrent_states.append(state)

        if backward_cell_net is not None:
            backward_links.append((cell_output + "_grad", states_grad, 1))
            backward_cell_net.Proto().external_input.append(
                str(cell_output) + "_grad")

            recurrent_input_grad = cell_input + "_grad"
            if not backward_blob_versions.get(recurrent_input_grad, 0):
                # If nobody writes to this recurrent input gradient, we need
                # to make sure it gets to the states grad blob after all.
                # We do this by using backward_links which triggers an alias
                # This logic is being used for example in a SumOp case
                backward_links.append(
                    (backward_mapping[cell_input], states_grad, 0))
            else:
                backward_links.append((cell_input + "_grad", states_grad, 0))

    for input_t, input_blob in inputs:
        forward_links.append((str(input_t), str(input_blob), 0))

    if backward_cell_net is not None:
        for input_t, input_blob in inputs:
            backward_links.append((
                backward_mapping[str(input_t)], str(input_blob) + "_grad", 0
            ))
        backward_cell_net.Proto().external_input.extend(
            cell_net.Proto().external_input)
        backward_cell_net.Proto().external_input.extend(
            cell_net.Proto().external_output)

    def unpack_triple(x):
        if x:
            a, b, c = zip(*x)
            return a, b, c
        return [], [], []

    # Splitting to separate lists so we can pass them to c++
    # where we ensemle them back
    link_internal, link_external, link_offset = unpack_triple(forward_links)
    alias_src, alias_dst, alias_offset = unpack_triple(aliases)

    recurrent_inputs = [str(x[1]) for x in initial_cell_inputs]

    # Make sure that recurrent gradients accumulate with internal gradients
    # (if a blob in the backward_cell_net receives gradient from both an
    # external connection as well as from within the backward_cell_net,
    # those gradients need to be added together, rather than one overwriting
    # the other)
    if backward_cell_net is not None:
        proto = backward_cell_net.Proto()
        operators = []
        while len(proto.op) > 0:
            op = proto.op[-1]
            proto.op.remove(op)
            operators.append(op)
        for op in operators[::-1]:
            proto.op.extend([op])
            for j, output_blob in enumerate(op.output):
                if output_blob in proto.external_input:
                    # In place operation won't cause issues because it takes
                    # existing value of a blob into account
                    if output_blob in op.input:
                        continue
                    output_blob = core.BlobReference(output_blob)
                    accum_blob = output_blob + "_accum"
                    proto.op[-1].output[j] = str(accum_blob)
                    backward_cell_net.Sum(
                        [output_blob, accum_blob],
                        [output_blob],
                    )

    backward_args = {}
    backward_mapping_keys = set(viewkeys(backward_mapping))
    if backward_cell_net is not None:
        backward_link_internal, backward_link_external, backward_link_offset = \
            unpack_triple(backward_links)
        params = [x for x in references if x in backward_mapping_keys]
        param_grads = [
            str(backward_mapping[x])
            for x in references
            if x in backward_mapping_keys
        ]
        if recompute_blobs_on_backward is None:
            recompute_blobs_on_backward = set()
        backward_args = {
            'param': [all_inputs.index(p) for p in params],
            'backward_link_internal': [str(l) for l in backward_link_internal],
            'backward_link_external': [str(l) for l in backward_link_external],
            'backward_link_offset': backward_link_offset,
            'backward_step_net': str(backward_cell_net.Proto()),
            'outputs_with_grads': outputs_with_grads,
            'recompute_blobs_on_backward': [
                str(b) for b in recompute_blobs_on_backward
            ],
            'param_grads': param_grads,
        }

    results = net.RecurrentNetwork(
        all_inputs,
        all_outputs + [s("step_workspaces")],
        alias_src=alias_src,
        alias_dst=[str(a) for a in alias_dst],
        alias_offset=alias_offset,
        recurrent_states=recurrent_states,
        initial_recurrent_state_ids=[
            all_inputs.index(i) for i in recurrent_inputs
        ],
        link_internal=[str(l) for l in link_internal],
        link_external=[str(l) for l in link_external],
        link_offset=link_offset,
        step_net=str(cell_net.Proto()),
        timestep="timestep" if timestep is None else str(timestep),
        **backward_args
    )

    # Restore net type since 'rnn' is not recognized outside RNNs
    cell_net.Proto().type = 'simple'

    # The last output is a list of step workspaces,
    # which is only needed internally for gradient propogation
    return results[:-1]
Esempio n. 48
0
    def update(self):
        try:
            with llfuse.lock_released:
                self._updating_lock.acquire()
                if not self.stale():
                    return

                contents = {}
                roots = []
                root_owners = set()
                objects = {}

                methods = self.api._rootDesc.get(
                    'resources')["groups"]['methods']
                if 'httpMethod' in methods.get('shared', {}):
                    page = []
                    while True:
                        resp = self.api.groups().shared(
                            filters=[['group_class', '=', 'project']] + page,
                            order="uuid",
                            limit=10000,
                            count="none",
                            include="owner_uuid").execute()
                        if not resp["items"]:
                            break
                        page = [[
                            "uuid", ">",
                            resp["items"][len(resp["items"]) - 1]["uuid"]
                        ]]
                        for r in resp["items"]:
                            objects[r["uuid"]] = r
                            roots.append(r["uuid"])
                        for r in resp["included"]:
                            objects[r["uuid"]] = r
                            root_owners.add(r["uuid"])
                else:
                    all_projects = arvados.util.list_all(
                        self.api.groups().list,
                        self.num_retries,
                        filters=[['group_class', '=', 'project']],
                        select=["uuid", "owner_uuid"])
                    for ob in all_projects:
                        objects[ob['uuid']] = ob

                    current_uuid = self.current_user['uuid']
                    for ob in all_projects:
                        if ob['owner_uuid'] != current_uuid and ob[
                                'owner_uuid'] not in objects:
                            roots.append(ob['uuid'])
                            root_owners.add(ob['owner_uuid'])

                    lusers = arvados.util.list_all(
                        self.api.users().list,
                        self.num_retries,
                        filters=[['uuid', 'in',
                                  list(root_owners)]])
                    lgroups = arvados.util.list_all(
                        self.api.groups().list,
                        self.num_retries,
                        filters=[['uuid', 'in',
                                  list(root_owners) + roots]])

                    for l in lusers:
                        objects[l["uuid"]] = l
                    for l in lgroups:
                        objects[l["uuid"]] = l

                for r in root_owners:
                    if r in objects:
                        obr = objects[r]
                        if obr.get("name"):
                            contents[obr["name"]] = obr
                        #elif obr.get("username"):
                        #    contents[obr["username"]] = obr
                        elif "first_name" in obr:
                            contents[u"{} {}".format(obr["first_name"],
                                                     obr["last_name"])] = obr

                for r in roots:
                    if r in objects:
                        obr = objects[r]
                        if obr['owner_uuid'] not in objects:
                            contents[obr["name"]] = obr

            # end with llfuse.lock_released, re-acquire lock

            self.merge(
                viewitems(contents), lambda i: i[0],
                lambda a, i: a.uuid() == i[1]['uuid'],
                lambda i: ProjectDirectory(self.inode,
                                           self.inodes,
                                           self.api,
                                           self.num_retries,
                                           i[1],
                                           poll=self._poll,
                                           poll_time=self._poll_time))
        except Exception:
            _logger.exception("arv-mount shared dir error")
        finally:
            self._updating_lock.release()