Beispiel #1
1
    def indent_code(self, code):
        """Accepts a string of code or a list of code lines"""

        # code mostly copied from ccode
        if isinstance(code, string_types):
            code_lines = self.indent_code(code.splitlines(True))
            return "".join(code_lines)

        tab = "  "
        inc_regex = ("^function ", "^if ", "^elseif ", "^else$", "^for ")
        dec_regex = ("^end$", "^elseif ", "^else$")

        # pre-strip left-space from the code
        code = [line.lstrip(" \t") for line in code]

        increase = [int(any([search(re, line) for re in inc_regex])) for line in code]
        decrease = [int(any([search(re, line) for re in dec_regex])) for line in code]

        pretty = []
        level = 0
        for n, line in enumerate(code):
            if line == "" or line == "\n":
                pretty.append(line)
                continue
            level -= decrease[n]
            pretty.append("%s%s" % (tab * level, line))
            level += increase[n]
        return pretty
Beispiel #2
0
def enum_extend(trace, msg, num_samples=None):
    """
    :param trace: a partial trace
    :param msg: the message at a pyro primitive site
    :param num_samples: maximum number of extended traces to return.
    :returns: a list of traces, copies of input trace with one extra site

    Utility function to copy and extend a trace with sites based on the input site
    whose values are enumerated from the support of the input site's distribution.

    Used for exact inference and integrating out discrete variables.
    """
    if num_samples is None:
        num_samples = -1

    # Batched .enumerate_support() assumes batched values are independent.
    batch_shape = msg["fn"].batch_shape(msg["value"], *msg["args"], **msg["kwargs"])
    is_batched = any(size > 1 for size in batch_shape)
    inside_iarange = any(frame.vectorized for frame in msg["cond_indep_stack"])
    if is_batched and not inside_iarange:
        raise ValueError(
                "Tried to enumerate a batched pyro.sample site '{}' outside of a pyro.iarange. "
                "To fix, either enclose in a pyro.iarange, or avoid batching.".format(msg["name"]))

    extended_traces = []
    for i, s in enumerate(msg["fn"].enumerate_support(*msg["args"], **msg["kwargs"])):
        if i > num_samples and num_samples >= 0:
            break
        msg_copy = msg.copy()
        msg_copy.update(value=s)
        tr_cp = trace.copy()
        tr_cp.add_node(msg["name"], **msg_copy)
        extended_traces.append(tr_cp)
    return extended_traces
Beispiel #3
0
  def _map_fetched_remote_source(self, go_remote_lib, gopath, all_known_remote_libs, resolved_remote_libs, undeclared_deps):
    for remote_import_path in self._get_remote_import_paths(go_remote_lib.import_path, gopath=gopath):
      fetcher = self._get_fetcher(remote_import_path)
      remote_root = fetcher.root()
      spec_path = os.path.join(go_remote_lib.target_base, remote_root)

      package_path = GoRemoteLibrary.remote_package_path(remote_root, remote_import_path)
      target_name = package_path or os.path.basename(remote_root)

      address = Address(spec_path, target_name)
      if not any(address == lib.address for lib in all_known_remote_libs):
        try:
          # If we've already resolved a package from this remote root, its ok to define an
          # implicit synthetic remote target for all other packages in the same remote root.
          same_remote_libs = [lib for lib in all_known_remote_libs if spec_path == lib.address.spec_path]
          implicit_ok = any(same_remote_libs)

          # If we're creating a synthetic remote target, we should pin it to the same
          # revision as the rest of the library.
          rev = None
          if implicit_ok:
            rev = same_remote_libs[0].rev

          remote_lib = self._resolve(go_remote_lib, address, package_path, rev, implicit_ok)
          resolved_remote_libs.add(remote_lib)
          all_known_remote_libs.add(remote_lib)
        except self.UndeclaredRemoteLibError as e:
          undeclared_deps[go_remote_lib].add((remote_import_path, e.address))
      self.context.build_graph.inject_dependency(go_remote_lib.address, address)
Beispiel #4
0
	def canMoveLeft(self):
		if any([j[0] <= 0 for j in self.cur_piece_pos]):
			return False
		elif any([self.grid[j[1]][j[0] - 1] for j in self.cur_piece_pos]):
			return False
		else:
			return True
    def generate_file_map(self):
        # Read all the files in the given folder.
        # We gather them all and then send them up to GAE.
        # We do this rather than processing template locally. Because local processing
        file_map = dict()
        fdir = os.path.dirname(self.view.file_name()).replace(self.parent_path+'/', '')
        for root, dirs, files in os.walk(self.path):
            for filename in files:
                if any(filename.endswith(postfix) for postfix in ['.tracking', '.html', '.txt', '.yaml', '.js']):
                    contents = read_file(os.path.join(root, filename))
                    file_map['%s/%s' % (fdir, filename)] = contents
                    # file_map[filename] = contents
        for root, dirs, files in os.walk(self.image_path):
            for filename in files:
                image_path = os.path.abspath(os.path.join(root, filename))
                contents = encode_image(image_path)
                file_map[filename] = contents
        for root, dirs, files in os.walk(self.parent_path):
            for filename in files:
                if any(filename.endswith(postfix) for postfix in ['.tracking', '.html', '.txt', '.yaml', '.js']):
                    contents = read_file(os.path.join(root, filename))
                    file_map[filename] = contents
        print(file_map.keys())

        return file_map
Beispiel #6
0
 def change_engine_state(self,widget):
     checked = widget.get_active()
     name = widget.get_child().get_text()
     if checked:
         if not any(x in name for x in self.engines_list):
             print "activating %s engine" % name
             self.engines_list.append(name)
             self.gui.conf["engines"] = self.engines_list
             self.gui.conf.write()
             self.init_engine(name)
             try:
                 if getattr(self, '%s' % name).adult_content:
                     self.gui.engine_selector.append(name,True)
             except:
                 self.gui.engine_selector.append(name)
             self.gui.engine_selector.setIndexFromString(name)
     else:
         if any(x in name for x in self.engines_list):
             print "deactivating %s engine" % name
             self.engines_list.remove(name)
             self.gui.conf["engines"] = self.engines_list
             self.gui.conf.write()
             self.gui.engine_selector.setIndexFromString(name)
             self.gui.engine_selector.remove(self.gui.engine_selector.getSelectedIndex())
             self.gui.engine_selector.select(0)
    def add_backend(self, port, instance):
        (section_name, section) = self.find_frontend(port)
        if section_name is None:
            return self
        
        backend_name = self.find_backend_name(section)
        if backend_name is None:
            return self

        backend = 'backend %s' % backend_name 
        if backend not in self.__content_map.iterkeys():
            raise 'no backend is found with name %s' % backend_name

        backend_conf = self.__content_map[backend]
        lbcookie_enabled = False
        appcookie_enabled = False
        if any("cookie AWSELB" in s for s in backend_conf):
            lbcookie_enabled = True
        elif any("appsession " in s for s in backend_conf):
            appcookie_enabled = True

        line = 'server %s %s:%d' % (section_name.replace('frontend','').strip(' '), instance['hostname'], instance['port'])
        if lbcookie_enabled or appcookie_enabled:
            line = line + ' cookie %s' % ConfBuilderHaproxy.encode_str(instance['hostname'])
       
        backend_conf.insert(0, line)
        return self
Beispiel #8
0
    def default_get(self, fields):
        rec = super(account_register_payments, self).default_get(fields)
        context = dict(self._context or {})
        active_model = context.get('active_model')
        active_ids = context.get('active_ids')

        # Checks on context parameters
        if not active_model or not active_ids:
            raise UserError(_("Programmation error: wizard action executed without active_model or active_ids in context."))
        if active_model != 'account.invoice':
            raise UserError(_("Programmation error: the expected model for this action is 'account.invoice'. The provided one is '%d'.") % active_model)

        # Checks on received invoice records
        invoices = self.env[active_model].browse(active_ids)
        if any(invoice.state != 'open' for invoice in invoices):
            raise UserError(_("You can only register payments for open invoices"))
        if any(inv.commercial_partner_id != invoices[0].commercial_partner_id for inv in invoices):
            raise UserError(_("In order to pay multiple invoices at once, they must belong to the same commercial partner."))
        if any(MAP_INVOICE_TYPE_PARTNER_TYPE[inv.type] != MAP_INVOICE_TYPE_PARTNER_TYPE[invoices[0].type] for inv in invoices):
            raise UserError(_("You cannot mix customer invoices and vendor bills in a single payment."))
        if any(inv.currency_id != invoices[0].currency_id for inv in invoices):
            raise UserError(_("In order to pay multiple invoices at once, they must use the same currency."))

        total_amount = sum(inv.residual * MAP_INVOICE_TYPE_PAYMENT_SIGN[inv.type] for inv in invoices)
        rec.update({
            'amount': abs(total_amount),
            'currency_id': invoices[0].currency_id.id,
            'payment_type': total_amount > 0 and 'inbound' or 'outbound',
            'partner_id': invoices[0].commercial_partner_id.id,
            'partner_type': MAP_INVOICE_TYPE_PARTNER_TYPE[invoices[0].type],
        })
        return rec
Beispiel #9
0
def aggregate_scores(scores, display_name="summary", location=None):
    """
    scores: A list of ScoreBase objects
    display_name: The display name for the score object
    location: The location under which all objects in scores are located
    returns: A tuple (all_total, graded_total).
        all_total: A ScoreBase representing the total score summed over all input scores
        graded_total: A ScoreBase representing the score summed over all graded input scores
    """
    total_correct_graded = float_sum(score.earned for score in scores if score.graded)
    total_possible_graded = float_sum(score.possible for score in scores if score.graded)
    any_attempted_graded = any(score.attempted for score in scores if score.graded)

    total_correct = float_sum(score.earned for score in scores)
    total_possible = float_sum(score.possible for score in scores)
    any_attempted = any(score.attempted for score in scores)

    # regardless of whether it is graded
    all_total = AggregatedScore(total_correct, total_possible, False, display_name, location, any_attempted)

    # selecting only graded things
    graded_total = AggregatedScore(
        total_correct_graded, total_possible_graded, True, display_name, location, any_attempted_graded,
    )

    return all_total, graded_total
    def hisRead(self,**kwargs):
        """
        This method returns a list of history records
        arguments are : 
        ids : a ID or a list of ID 
        AND_search : a list of keywords to look for in trend names
        OR_search : a list of keywords to look for in trend names
        rng : haystack range (today,yesterday, last24hours...
        start : string representation of start time ex. '2014-01-01T00:00' 
        end : string representation of end time ex. '2014-01-01T00:00'
        """
        self._filteredList = [] # Empty list to be returned
        # Keyword Arguments
        print(kwargs)
        ids = kwargs.pop('id','')
        AND_search = kwargs.pop('AND_search','')
        OR_search = kwargs.pop('OR_search','')
        rng = kwargs.pop('rng','')
        start = kwargs.pop('start','')
        end = kwargs.pop('end','')
        takeall = kwargs.pop('all','')
        # Remaining kwargs...
        if kwargs: raise TypeError('Unknown argument(s) : %s' % kwargs)
        
        # Build datetimeRange based on start and end
        if start and end:
            datetimeRange = start+','+end
        else:
            datetimeRange = rng
        
        
        # Find histories matching ALL keywords in AND_search
        for eachHistory in self.hisAll():
            takeit = False
            # Find histories matching ANY keywords in OR_search
            if (AND_search != '') and all([keywords in eachHistory['name'] for keywords in AND_search]):
                print('AND_search : Adding %s to recordList' % eachHistory['name'])               
                takeit = True
                
            # Find histories matching ANY ID in id list       
            elif (OR_search != '') and any([keywords in eachHistory['name'] for keywords in OR_search]):
                print('OR_search : Adding %s to recordList' % eachHistory['name'])                
                takeit = True
                
            elif (ids != '') and any([id in eachHistory['id'] for id in ids]):
                print('ID found : Adding %s to recordList' % eachHistory['name'])
                takeit = True
            
            elif takeall != '':
                print('Adding %s to recordList' % eachHistory['name'])
                takeit = True
                
            if takeit:
                self._filteredList.append(HisRecord(self,eachHistory['id'],datetimeRange))
            

        if self._filteredList == []:
            print('No trends found... sorry !')
        
        return self._filteredList
Beispiel #11
0
def test_good(x):
    """Tests if scalar is infinity, NaN, or None.

    Parameters
    ----------
    x : scalar
        Input to test.

    Results
    -------
    good : logical
        False if x is inf, NaN, or None; True otherwise."""

    good = False

    #DEBUG
    return True

    if x.ndim==0:

        if x==np.inf or x==-np.inf or x is None or math.isnan(x):
            good = False
        else:
            good = True

    else:
        x0 = x.flatten()
        if any(x0==np.inf) or any(x==-np.inf) or any(x is None) or math.isnan(x0):
            good = False
        else:
            good = True

    return good
Beispiel #12
0
    def _rec(self, obj, state):
        """
        EXAMPLES::

            sage: from sage.combinat.sf.ns_macdonald import NonattackingBacktracker
            sage: n = NonattackingBacktracker(LatticeDiagram([0,1,2]))
            sage: len(list(n))
            12
            sage: obj = [ [], [None], [None, None]]
            sage: state = 2, 1
            sage: list(n._rec(obj, state))
            [([[], [1], [None, None]], (3, 1), False),
             ([[], [2], [None, None]], (3, 1), False)]
        """
        #We need to set the i,j^th entry.
        i, j = state

        #Get the next state
        new_state = self.get_next_pos(i, j)
        yld = True if new_state is None else False

        for k in range(1, len(self._shape)+1):
            #We check to make sure that k does not
            #violate any of the attacking conditions
            if j==1 and any( self.pi(x)==k for x in range(i+1, len(self._shape)+1)):
                continue
            if any( obj[ii-1][jj-1] == k for ii, jj in
                    self._shape.boxes_same_and_lower_right(i, j) if jj != 0):
                continue

            #Fill in the in the i,j box with k+1
            obj[i-1][j-1] = k

            #Yield the object
            yield copy.deepcopy(obj), new_state, yld
Beispiel #13
0
def name_lookup(c, fields):
    def join_fields(fields, want):
        return ' '.join(v for k, v in fields if k in want)
    if not any(k == 'd' for k, v in fields):
        return []
    ab = [v for k, v in fields if k in 'ab']
    name = ' '.join(ab)
    flipped = flip_name(name)
    names = set([name, flipped])
    #names = set([flipped])
    if any(k == 'c' for k, v in fields):
        name = join_fields(fields, 'abc')
        names.update([name, flip_name(name)])
        title = [v for k, v in fields if k in 'c']
        names.update([' '.join(title + ab), ' '.join(title + [flipped])])
        title = ' '.join(title)
        names.update(["%s (%s)" % (name, title), "%s (%s)" % (flipped, title)])
        sp = title.find(' ')
        if sp != -1:
            m = re_title_of.search(title)
            if m:
                role, of_place = m.groups()
                names.update([' '.join(ab + [of_place]), ' '.join([flipped, of_place])])
                names.update([' '.join([role] + ab + [of_place]), ' '.join([role, flipped, of_place])])

            t = title[:sp]
            names.update([' '.join([t] + ab), ' '.join([t, flipped])])

    found = []
    for n in set(re_comma.sub(' ', n) for n in names):
        c.execute("select title, cats, name, persondata from names, people where people.id = names.person_id and name=%s", (n,))
        found += c.fetchall()
    return found
Beispiel #14
0
 def _get_indicators(self, prototype=None, unwrap=True):
     from abjad.tools import indicatortools
     prototype = prototype or (object,)
     if not isinstance(prototype, tuple):
         prototype = (prototype,)
     prototype_objects, prototype_classes = [], []
     for indicator_prototype in prototype:
         if isinstance(indicator_prototype, type):
             prototype_classes.append(indicator_prototype)
         else:
             prototype_objects.append(indicator_prototype)
     prototype_objects = tuple(prototype_objects)
     prototype_classes = tuple(prototype_classes)
     matching_indicators = []
     for indicator in self._indicator_expressions:
         if isinstance(indicator, prototype_classes):
             matching_indicators.append(indicator)
         elif any(indicator == x for x in prototype_objects):
             matching_indicators.append(indicator)
         elif isinstance(indicator, indicatortools.IndicatorExpression):
             if isinstance(indicator.indicator, prototype_classes):
                 matching_indicators.append(indicator)
             elif any(indicator.indicator == x for x in prototype_objects):
                 matching_indicators.append(indicator)
     if unwrap:
         matching_indicators = [x.indicator for x in matching_indicators]
     matching_indicators = tuple(matching_indicators)
     return matching_indicators
Beispiel #15
0
def _check_for_problem_somatic_batches(items, config):
    """Identify problem batch setups for somatic calling.

    We do not support multiple tumors in a single batch and VarDict(Java) does not
    handle pooled calling, only tumor/normal.
    """
    to_check = []
    for data in items:
        data = copy.deepcopy(data)
        data["config"] = config_utils.update_w_custom(config, data)
        to_check.append(data)
    data_by_batches = collections.defaultdict(list)
    for data in to_check:
        batches = dd.get_batches(data)
        if batches:
            for batch in batches:
                data_by_batches[batch].append(data)
    for batch, items in data_by_batches.items():
        if vcfutils.get_paired(items):
            vcfutils.check_paired_problems(items)
        elif len(items) > 1:
            vcs = list(set(tz.concat([dd.get_variantcaller(data) or [] for data in items])))
            if any(x.lower().startswith("vardict") for x in vcs):
                raise ValueError("VarDict does not support pooled non-tumor/normal calling, in batch %s: %s"
                                 % (batch, [dd.get_sample_name(data) for data in items]))
            elif any(x.lower() == "mutect" for x in vcs):
                raise ValueError("Mutect requires a 'phenotype: tumor' sample for calling, in batch %s: %s"
                                 % (batch, [dd.get_sample_name(data) for data in items]))
Beispiel #16
0
def _file_configs_paths(osname, agentConfig):
    """ Retrieve all the file configs and return their paths
    """
    try:
        confd_path = get_confd_path(osname)
        all_file_configs = glob.glob(os.path.join(confd_path, '*.yaml'))
        all_default_configs = glob.glob(os.path.join(confd_path, '*.yaml.default'))
    except PathNotFound as e:
        log.error("No conf.d folder found at '%s' or in the directory where the Agent is currently deployed.\n" % e.args[0])
        sys.exit(3)

    if all_default_configs:
        current_configs = set([_conf_path_to_check_name(conf) for conf in all_file_configs])
        for default_config in all_default_configs:
            if not _conf_path_to_check_name(default_config) in current_configs:
                all_file_configs.append(default_config)

    # Compatibility code for the Nagios checks if it's still configured
    # in datadog.conf
    # FIXME: 6.x, should be removed
    if not any('nagios' in config for config in itertools.chain(*all_file_configs)):
        # check if it's configured in datadog.conf the old way
        if any([nagios_key in agentConfig for nagios_key in NAGIOS_OLD_CONF_KEYS]):
            all_file_configs.append('deprecated/nagios')

    return all_file_configs
def test_request_xml_dict_params():
    xml = adapters.xml_request(
        'client.get',
        id="5",
        monkey=dict(name="butter")
    )
    
    # test that xml looks roughly like either
    # <request method="client.get">
    #     <id>5</id>
    #     <monkey><name>butter</name></monkey>
    # </request>
    # or
    # <request method="client.get">
    #     <id>5</id>
    #     <monkey><name>butter</name></monkey>
    # </request>
    #
    # (We don't actually care which.)
    request_document = etree.fromstring(xml)
    assert 'request' == request_document.tag
    assert {'method': 'client.get'} == request_document.attrib
    assert 2 == len(request_document)
    assert any(
        parameter.tag == 'id' and parameter.text == '5'
        for parameter in request_document
    )
    assert any(
        parameter.tag == 'monkey' 
        and len(parameter) == 1
        and parameter[0].tag == 'name'
        and parameter[0].text == 'butter'
        for parameter in request_document
    )
Beispiel #18
0
def assess_ship_design_role(design):
    parts = [fo.getPartType(partname) for partname in design.parts if partname and fo.getPartType(partname)]

    if any(p.partClass == fo.shipPartClass.colony and p.capacity == 0 for p in parts):
        if design.speed > 0:
            return ShipRoleType.CIVILIAN_OUTPOST
        else:
            return ShipRoleType.BASE_OUTPOST

    if any(p.partClass == fo.shipPartClass.colony and p.capacity > 0 for p in parts):
        if design.speed > 0:
            return ShipRoleType.CIVILIAN_COLONISATION
        else:
            return ShipRoleType.BASE_COLONISATION

    if any(p.partClass == fo.shipPartClass.troops for p in parts):
        if design.speed > 0:
            return ShipRoleType.MILITARY_INVASION
        else:
            return ShipRoleType.BASE_INVASION

    if design.speed == 0:
        if not parts or parts[0].partClass == fo.shipPartClass.shields:  # ToDo: Update logic for new ship designs
            return ShipRoleType.BASE_DEFENSE
        else:
            return ShipRoleType.INVALID

    stats = foAI.foAIstate.get_design_id_stats(design.id)
    rating = stats['attack'] * (stats['structure'] + stats['shields'])
    if rating > 0:  # positive attack stat
        return ShipRoleType.MILITARY
    if any(p.partClass == fo.shipPartClass.detection for p in parts):
        return ShipRoleType.CIVILIAN_EXPLORATION
    else:   # if no suitable role found, use as (bad) scout as it still has inherent detection
        return ShipRoleType.CIVILIAN_EXPLORATION
Beispiel #19
0
def get_gtr(codons):
    """
    This is a generalization of get_ts_tv_exch.
    It returns a higher dimensional ndarray
    whose shape is (ncodons, ncodons, 6) where the dimension of the last
    axis is the number of upper off-diagonal entries in a nucleotide
    rate matrix, that is, 4*(4-1)/2 = 6.
    The value of M[i, j, k] is 1 if codons i and j differ at exactly
    one nucleotide position and k is the type of the unordered difference,
    otherwise M[i, j, k] is 0.
    This is a very sparse and wasteful representation,
    but it is nice for vectorization.
    @param codons: sequence of lower case codon strings
    @return: a numpy array of ndim 3
    """
    ncodons = len(codons)
    ham = get_hamming(codons)
    M = numpy.zeros((ncodons, ncodons, 6), dtype=int)
    for i, ci in enumerate(codons):
        for j, cj in enumerate(codons):
            if ham[i, j] == 1:
                for k, pk in enumerate(g_nuc_gtr):
                    if any(ci[a]+cj[a]==pk for a in range(3)):
                        M[i, j, k] = 1
                    if any(cj[a]+ci[a]==pk for a in range(3)):
                        M[i, j, k] = 1
    return M
Beispiel #20
0
    def _select(self, event):
        """This is basically a proxy to trigger a pick event.  This function is
        connected to either a mouse motion or mouse button event (see
        "self.enable") depending on "self.hover". If we're over a point, it
        fires a pick event.

        This probably seems bizarre, but it's required for hover mode (no mouse
        click) and otherwise it's a workaround for picking artists in twinned
        or overlapping axes.

        Even if we're not in hover mode, pick events won't work properly for
        twinned axes.  Therefore, we manually go through all artists managed by
        this datacursor and fire a pick event if the mouse is over an a managed
        artist."""
        for artist in self.artists:
            # We need to redefine event.xdata and event.ydata for twinned axes
            # to work correctly
            point = event.x, event.y
            x, y = artist.axes.transData.inverted().transform_point(point)
            event = copy.copy(event)
            event.xdata, event.ydata = x, y
            artist.pick(event)

        from itertools import chain
        all_artists = chain(self.artists, self.annotations.values())
        over_something = [x.contains(event)[0] for x in all_artists]

        if any(self.timer_expired.values()) and not self.draggable:
            # Not hovering over anything...
            if not any(over_something) and self.hover:
                self.hide()
Beispiel #21
0
    def _fields_sync(self, cr, uid, partner, update_values, context=None):
        """ Sync commercial fields and address fields from company and to children after create/update,
        just as if those were all modeled as fields.related to the parent """
        # 1. From UPSTREAM: sync from parent
        if update_values.get('parent_id') or update_values.get('type', 'contact'):  # TDE/ fp change to check, get default value not sure
            # 1a. Commercial fields: sync if parent changed
            if update_values.get('parent_id'):
                self._commercial_sync_from_company(cr, uid, partner, context=context)
            # 1b. Address fields: sync if parent or use_parent changed *and* both are now set 
            if partner.parent_id and partner.type == 'contact':
                onchange_vals = self.onchange_parent_id(cr, uid, [partner.id],
                                                        parent_id=partner.parent_id.id,
                                                        context=context).get('value', {})
                partner.update_address(onchange_vals)

        # 2. To DOWNSTREAM: sync children
        if partner.child_ids:
            # 2a. Commercial Fields: sync if commercial entity
            if partner.commercial_partner_id == partner:
                commercial_fields = self._commercial_fields(cr, uid,
                                                            context=context)
                if any(field in update_values for field in commercial_fields):
                    self._commercial_sync_to_children(cr, uid, partner,
                                                      context=context)
            # 2b. Address fields: sync if address changed
            address_fields = self._address_fields(cr, uid, context=context)
            if any(field in update_values for field in address_fields):
                domain_children = [('parent_id', '=', partner.id), ('type', '=', 'contact')]
                update_ids = self.search(cr, uid, domain_children, context=context)
                self.update_address(cr, uid, update_ids, update_values, context=context)
Beispiel #22
0
def get_exch_ts_tv(codons):
    """
    This is a more sophisticated version of get_ts_tv.
    Or alternatively it is a more restricted version of get_gtr.
    It returns an ndim-3 matrix whose shape is (ncodons, ncodons, 2)
    where the third axis specifies transitions and transversions.
    The name exch refers to exchangeability, because this function
    precomputes an ndarray that is used as a component to help build
    the part of the rate matrix that corresponds
    to the nucleotide exchangeability (as opposed to overall rate,
    or nucleotide equilibrium probabilities,
    or mutation-selection codon exchangeability) in the codon rate matrix.
    @param codons: sequence of lower case codon strings
    @return: a numpy array of ndim 3
    """
    ncodons = len(codons)
    ham = get_hamming(codons)
    M = numpy.zeros((ncodons, ncodons, 2), dtype=int)
    for i, ci in enumerate(codons):
        for j, cj in enumerate(codons):
            if ham[i, j] == 1:
                if any(a+b in g_ts for a,b in zip(ci,cj)):
                    M[i, j, 0] = 1
                if any(a+b in g_tv for a,b in zip(ci,cj)):
                    M[i, j, 1] = 1
    return M
Beispiel #23
0
    def test_ignore(self):
        n = self.pathod("304")
        self._ignore_on()
        i = self.pathod("305")
        i2 = self.pathod("306")
        self._ignore_off()

        self.master.masterq.join()

        assert n.status_code == 304
        assert i.status_code == 305
        assert i2.status_code == 306
        assert any(f.response.status_code == 304 for f in self.master.state.flows)
        assert not any(f.response.status_code == 305 for f in self.master.state.flows)
        assert not any(f.response.status_code == 306 for f in self.master.state.flows)

        # Test that we get the original SSL cert
        if self.ssl:
            i_cert = SSLCert(i.sslinfo.certchain[0])
            i2_cert = SSLCert(i2.sslinfo.certchain[0])
            n_cert = SSLCert(n.sslinfo.certchain[0])

            assert i_cert == i2_cert
            assert i_cert != n_cert

        # Test Non-HTTP traffic
        spec = "200:i0,@100:d0"  # this results in just 100 random bytes
        # mitmproxy responds with bad gateway
        assert self.pathod(spec).status_code == 502
        self._ignore_on()
        with raises(HttpException):
            self.pathod(spec)  # pathoc tries to parse answer as HTTP

        self._ignore_off()
Beispiel #24
0
def checkPlanetSplitter (city="nyc"):
    # Run planetsplitter if .mem files don't exist for city. Also unzips OSM
    # file if still in .bz2 format
    files = os.listdir (".") # from /src
    if city.lower ()[0] == "l":
        city = "london"
        prfx = "lo"
    else:
        city = "nyc"
        prfx = "ny"
    # First unzip
    datadir = "../data/"
    dfiles = os.listdir (datadir)
    fcheck = any (f.find (city) > -1 and f.endswith(".osm") for f in dfiles)
    if not any (f.find(city) > -1 and f.endswith (".osm") for f in dfiles):
        bf = [f for f in dfiles if f.find (city) > -1 and f.endswith (".bz2")]
        if not bf:
            print "ERROR: %s.bz2 file does not exist to unzip" % bf 
            # TODO: exception handler
        else:
            bf = datadir + bf [0]
            args = ["bunzip2", bf]
            print "Unzipping planet-%s.osm ... " % city
            subprocess.Popen (args)
    if not any (f.startswith(prfx) and f.endswith(".mem") for f in files):
        planetfile = datadir + "planet-" + city + ".osm"
        args = ["/Users/colinbroderick/Downloads/routino-2.7.2/src/planetsplitter", "--prefix=" + prfx,\
                "--tagging=/Users/colinbroderick/Downloads/routino-2.7.2/xml/routino-tagging.xml",\
                planetfile]
        print "planet-%s.osm not yet split. Running planetsplitter..." % city
        subprocess.Popen (args)
    else:
        print "%s already split" % city
    def checkRequirementsMatch(self, subPanelName):
        # Read requirements for the specified subpanel form the XML config file
        xmlRequirement = "./Subpanels/Subpanel/[@Name='" + subPanelName +"']/Requirement"
        subPanelRequirements = xml.findall(xmlRequirement)
        
        panelRequirements = {}
        booleanOperation = {}      
        for requirements in subPanelRequirements:
            requirement = requirements.text.split(':')
            if requirement[0] == "All": # Need element 1 populated if "All" detected
                requirement.append("All")
            panelRequirements[requirement[0]] = requirement[1].strip()
            booleanOperation[requirement[0]] = requirements.get("type")

        # Go through each subpanel requirement and check against board configuration
        # If no boolean type defined, assume AND
        requirementType = panelRequirements.keys()
        # If no Requirement found, assume ALL
        try:
            if (requirementType[0] == "All"):
                check = True
            else:
                check = any(panelRequirements[requirementType[0]] in s for s in self.boardConfiguration.values())
                for testRequirement in requirementType[1:]:
                    if (booleanOperation[testRequirement] == "or") or (booleanOperation[testRequirement] == "OR"):
                        check = check or any(panelRequirements[testRequirement] in s for s in self.boardConfiguration.values())
                    else:
                        check = check and any(panelRequirements[testRequirement] in s for s in self.boardConfiguration.values())
        except:
            check = True
        return check
def googleplus(url):
    try:
        result = getUrl(url).result
        u = re.compile('"(http.+?videoplayback[?].+?)"').findall(result)
        if len(u) == 0:
            result = getUrl(url, mobile=True).result
            u = re.compile('"(http.+?videoplayback[?].+?)"').findall(result)

        u = [i.replace('\\u003d','=').replace('\\u0026','&') for i in u]

        d = []
        try: d += [[{'quality': '1080p', 'url': i} for i in u if any(x in i for x in ['&itag=37&', '&itag=137&', '&itag=299&', '&itag=96&', '&itag=248&', '&itag=303&', '&itag=46&'])][0]]
        except: pass
        try: d += [[{'quality': 'HD', 'url': i} for i in u if any(x in i for x in ['&itag=22&', '&itag=84&', '&itag=136&', '&itag=298&', '&itag=120&', '&itag=95&', '&itag=247&', '&itag=302&', '&itag=45&', '&itag=102&'])][0]]
        except: pass

        url = []
        for i in d:
            try: url.append({'quality': i['quality'], 'url': getUrl(i['url'], output='geturl').result})
            except: pass

        if url == []: return
        return url
    except:
        return
Beispiel #27
0
def filter_file(file_name):
    if any(file_name.startswith(ignored_file) for ignored_file in ignored_files):
        return False
    base_name = os.path.basename(file_name)
    if any(fnmatch.fnmatch(base_name, pattern) for pattern in file_patterns_to_ignore):
        return False
    return True
Beispiel #28
0
def apply_filter_include_exclude(
        filename, include_filters, exclude_filters):
    """Apply inclusion/exclusion filters to filename

    The include_filters are tested against
    the given (relative) filename.
    The exclude_filters are tested against
    the stripped, given (relative), and absolute filenames.

    filename (str): the file path to match, should be relative
    include_filters (list of regex): ANY of these filters must match
    exclude_filters (list of regex): NONE of these filters must match

    returns: (filtered, exclude)
        filtered (bool): True when filename failed the include_filter
        excluded (bool): True when filename failed the exclude_filters
    """

    filtered = not any(f.match(filename) for f in include_filters)
    excluded = False

    if filtered:
        return filtered, excluded

    excluded = any(f.match(filename) for f in exclude_filters)

    return filtered, excluded
Beispiel #29
0
def get_squeeze_dims(xarray_obj,
                     dim: Union[Hashable, Iterable[Hashable], None] = None,
                     axis: Union[int, Iterable[int], None] = None
                     ) -> List[Hashable]:
    """Get a list of dimensions to squeeze out.
    """
    if dim is not None and axis is not None:
        raise ValueError('cannot use both parameters `axis` and `dim`')
    if dim is None and axis is None:
        return [d for d, s in xarray_obj.sizes.items() if s == 1]

    if isinstance(dim, Iterable) and not isinstance(dim, str):
        dim = list(dim)
    elif dim is not None:
        dim = [dim]
    else:
        assert axis is not None
        if isinstance(axis, int):
            axis = [axis]
        axis = list(axis)
        if any(not isinstance(a, int) for a in axis):
            raise TypeError(
                'parameter `axis` must be int or iterable of int.')
        alldims = list(xarray_obj.sizes.keys())
        dim = [alldims[a] for a in axis]

    if any(xarray_obj.sizes[k] > 1 for k in dim):
        raise ValueError('cannot select a dimension to squeeze out '
                         'which has length greater than one')
    return dim
Beispiel #30
0
    def test_tcp(self):
        n = self.pathod("304")
        self._tcpproxy_on()
        i = self.pathod("305")
        i2 = self.pathod("306")
        self._tcpproxy_off()

        self.master.masterq.join()

        assert n.status_code == 304
        assert i.status_code == 305
        assert i2.status_code == 306
        assert any(f.response.status_code == 304 for f in self.master.state.flows)
        assert not any(f.response.status_code == 305 for f in self.master.state.flows)
        assert not any(f.response.status_code == 306 for f in self.master.state.flows)

        # Test that we get the original SSL cert
        if self.ssl:
            i_cert = SSLCert(i.sslinfo.certchain[0])
            i2_cert = SSLCert(i2.sslinfo.certchain[0])
            n_cert = SSLCert(n.sslinfo.certchain[0])

            assert i_cert == i2_cert == n_cert

        # Make sure that TCP messages are in the event log.
        assert any("305" in m for m in self.master.log)
        assert any("306" in m for m in self.master.log)
Beispiel #31
0
 def is_published(self):
     return any([x.is_public for x in self.external_records.all()])
Beispiel #32
0
def convex_hull(*args, **kwargs):
    """The convex hull surrounding the Points contained in the list of entities.

    Parameters
    ==========

    args : a collection of Points, Segments and/or Polygons

    Returns
    =======

    convex_hull : Polygon if ``polygon`` is True else as a tuple `(U, L)` where ``L`` and ``U`` are the lower and upper hulls, respectively.

    Notes
    =====

    This can only be performed on a set of points whose coordinates can
    be ordered on the number line.

    References
    ==========

    [1] https://en.wikipedia.org/wiki/Graham_scan

    [2] Andrew's Monotone Chain Algorithm
    (A.M. Andrew,
    "Another Efficient Algorithm for Convex Hulls in Two Dimensions", 1979)
    http://geomalgorithms.com/a10-_hull-1.html

    See Also
    ========

    sympy.geometry.point.Point, sympy.geometry.polygon.Polygon

    Examples
    ========

    >>> from sympy.geometry import Point, convex_hull
    >>> points = [(1, 1), (1, 2), (3, 1), (-5, 2), (15, 4)]
    >>> convex_hull(*points)
    Polygon(Point2D(-5, 2), Point2D(1, 1), Point2D(3, 1), Point2D(15, 4))
    >>> convex_hull(*points, **dict(polygon=False))
    ([Point2D(-5, 2), Point2D(15, 4)],
     [Point2D(-5, 2), Point2D(1, 1), Point2D(3, 1), Point2D(15, 4)])

    """
    from .entity import GeometryEntity
    from .point import Point
    from .line import Segment
    from .polygon import Polygon

    polygon = kwargs.get('polygon', True)
    p = OrderedSet()
    for e in args:
        if not isinstance(e, GeometryEntity):
            try:
                e = Point(e)
            except NotImplementedError:
                raise ValueError('%s is not a GeometryEntity and cannot be made into Point' % str(e))
        if isinstance(e, Point):
            p.add(e)
        elif isinstance(e, Segment):
            p.update(e.points)
        elif isinstance(e, Polygon):
            p.update(e.vertices)
        else:
            raise NotImplementedError(
                'Convex hull for %s not implemented.' % type(e))

    # make sure all our points are of the same dimension
    if any(len(x) != 2 for x in p):
        raise ValueError('Can only compute the convex hull in two dimensions')

    p = list(p)
    if len(p) == 1:
        return p[0] if polygon else (p[0], None)
    elif len(p) == 2:
        s = Segment(p[0], p[1])
        return s if polygon else (s, None)

    def _orientation(p, q, r):
        '''Return positive if p-q-r are clockwise, neg if ccw, zero if
        collinear.'''
        return (q.y - p.y)*(r.x - p.x) - (q.x - p.x)*(r.y - p.y)

    # scan to find upper and lower convex hulls of a set of 2d points.
    U = []
    L = []
    try:
        p.sort(key=lambda x: x.args)
    except TypeError:
        raise ValueError("The points could not be sorted.")
    for p_i in p:
        while len(U) > 1 and _orientation(U[-2], U[-1], p_i) <= 0:
            U.pop()
        while len(L) > 1 and _orientation(L[-2], L[-1], p_i) >= 0:
            L.pop()
        U.append(p_i)
        L.append(p_i)
    U.reverse()
    convexHull = tuple(L + U[1:-1])

    if len(convexHull) == 2:
        s = Segment(convexHull[0], convexHull[1])
        return s if polygon else (s, None)
    if polygon:
        return Polygon(*convexHull)
    else:
        U.reverse()
        return (U, L)
Beispiel #33
0
    def getUsers(self):
        infoMsg = "fetching database users"
        logger.info(infoMsg)

        rootQuery = queries[Backend.getIdentifiedDbms()].users

        condition = (Backend.isDbms(DBMS.MSSQL) and Backend.isVersionWithin(
            ("2005", "2008")))
        condition |= (Backend.isDbms(DBMS.MYSQL)
                      and not kb.data.has_information_schema)

        if any(
                isTechniqueAvailable(_)
                for _ in (PAYLOAD.TECHNIQUE.UNION, PAYLOAD.TECHNIQUE.ERROR,
                          PAYLOAD.TECHNIQUE.QUERY)) or conf.direct:
            if condition:
                query = rootQuery.inband.query2
            else:
                query = rootQuery.inband.query
            values = inject.getValue(query, blind=False, time=False)

            if not isNoneValue(values):
                kb.data.cachedUsers = []
                for value in arrayizeValue(values):
                    value = unArrayizeValue(value)
                    if not isNoneValue(value):
                        kb.data.cachedUsers.append(value)

        if not kb.data.cachedUsers and isInferenceAvailable(
        ) and not conf.direct:
            infoMsg = "fetching number of database users"
            logger.info(infoMsg)

            if condition:
                query = rootQuery.blind.count2
            else:
                query = rootQuery.blind.count

            count = inject.getValue(query,
                                    union=False,
                                    error=False,
                                    expected=EXPECTED.INT,
                                    charsetType=CHARSET_TYPE.DIGITS)

            if count == 0:
                return kb.data.cachedUsers
            elif not isNumPosStrValue(count):
                errMsg = "unable to retrieve the number of database users"
                raise SqlmapNoneDataException(errMsg)

            plusOne = Backend.getIdentifiedDbms() in (DBMS.ORACLE, DBMS.DB2)
            indexRange = getLimitRange(count, plusOne=plusOne)

            for index in indexRange:
                if Backend.getIdentifiedDbms() in (DBMS.SYBASE, DBMS.MAXDB):
                    query = rootQuery.blind.query % (kb.data.cachedUsers[-1]
                                                     if kb.data.cachedUsers
                                                     else " ")
                elif condition:
                    query = rootQuery.blind.query2 % index
                else:
                    query = rootQuery.blind.query % index
                user = unArrayizeValue(
                    inject.getValue(query, union=False, error=False))

                if user:
                    kb.data.cachedUsers.append(user)

        if not kb.data.cachedUsers:
            errMsg = "unable to retrieve the database users"
            logger.error(errMsg)

        return kb.data.cachedUsers
Beispiel #34
0
    def getPrivileges(self, query2=False):
        infoMsg = "fetching database users privileges"

        rootQuery = queries[Backend.getIdentifiedDbms()].privileges

        if conf.user == CURRENT_USER:
            infoMsg += " for current user"
            conf.user = self.getCurrentUser()

        logger.info(infoMsg)

        if conf.user and Backend.getIdentifiedDbms() in (DBMS.ORACLE,
                                                         DBMS.DB2):
            conf.user = conf.user.upper()

        if conf.user:
            users = conf.user.split(',')

            if Backend.isDbms(DBMS.MYSQL):
                for user in users:
                    parsedUser = re.search(r"['\"]?(.*?)['\"]?\@", user)

                    if parsedUser:
                        users[users.index(user)] = parsedUser.groups()[0]
        else:
            users = []

        users = [_ for _ in users if _]

        # Set containing the list of DBMS administrators
        areAdmins = set()

        if not kb.data.cachedUsersPrivileges and any(
                isTechniqueAvailable(_)
                for _ in (PAYLOAD.TECHNIQUE.UNION, PAYLOAD.TECHNIQUE.ERROR,
                          PAYLOAD.TECHNIQUE.QUERY)) or conf.direct:
            if Backend.isDbms(
                    DBMS.MYSQL) and not kb.data.has_information_schema:
                query = rootQuery.inband.query2
                condition = rootQuery.inband.condition2
            elif Backend.isDbms(DBMS.ORACLE) and query2:
                query = rootQuery.inband.query2
                condition = rootQuery.inband.condition2
            else:
                query = rootQuery.inband.query
                condition = rootQuery.inband.condition

            if conf.user:
                query += " WHERE "

                if Backend.isDbms(
                        DBMS.MYSQL) and kb.data.has_information_schema:
                    query += " OR ".join("%s LIKE '%%%s%%'" % (condition, user)
                                         for user in sorted(users))
                else:
                    query += " OR ".join("%s = '%s'" % (condition, user)
                                         for user in sorted(users))

            values = inject.getValue(query, blind=False, time=False)

            if not values and Backend.isDbms(DBMS.ORACLE) and not query2:
                infoMsg = "trying with table 'USER_SYS_PRIVS'"
                logger.info(infoMsg)

                return self.getPrivileges(query2=True)

            if not isNoneValue(values):
                for value in values:
                    user = None
                    privileges = set()

                    for count in xrange(0, len(value or [])):
                        # The first column is always the username
                        if count == 0:
                            user = value[count]

                        # The other columns are the privileges
                        else:
                            privilege = value[count]

                            if privilege is None:
                                continue

                            # In PostgreSQL we get 1 if the privilege is
                            # True, 0 otherwise
                            if Backend.isDbms(DBMS.PGSQL) and getUnicode(
                                    privilege).isdigit():
                                if int(privilege) == 1:
                                    privileges.add(PGSQL_PRIVS[count])

                            # In MySQL >= 5.0 and Oracle we get the list
                            # of privileges as string
                            elif Backend.isDbms(DBMS.ORACLE) or (
                                    Backend.isDbms(DBMS.MYSQL)
                                    and kb.data.has_information_schema):
                                privileges.add(privilege)

                            # In MySQL < 5.0 we get Y if the privilege is
                            # True, N otherwise
                            elif Backend.isDbms(
                                    DBMS.MYSQL
                            ) and not kb.data.has_information_schema:
                                if privilege.upper() == 'Y':
                                    privileges.add(MYSQL_PRIVS[count])

                            # In Firebird we get one letter for each privilege
                            elif Backend.isDbms(DBMS.FIREBIRD):
                                if privilege.strip() in FIREBIRD_PRIVS:
                                    privileges.add(
                                        FIREBIRD_PRIVS[privilege.strip()])

                            # In DB2 we get Y or G if the privilege is
                            # True, N otherwise
                            elif Backend.isDbms(DBMS.DB2):
                                privs = privilege.split(',')
                                privilege = privs[0]
                                if len(privs) > 1:
                                    privs = privs[1]
                                    privs = list(privs.strip())
                                    i = 1

                                    for priv in privs:
                                        if priv.upper() in ('Y', 'G'):
                                            for position, db2Priv in DB2_PRIVS.items(
                                            ):
                                                if position == i:
                                                    privilege += ", " + db2Priv

                                        i += 1

                                privileges.add(privilege)

                    if user in kb.data.cachedUsersPrivileges:
                        kb.data.cachedUsersPrivileges[user] = list(
                            privileges.union(
                                kb.data.cachedUsersPrivileges[user]))
                    else:
                        kb.data.cachedUsersPrivileges[user] = list(privileges)

        if not kb.data.cachedUsersPrivileges and isInferenceAvailable(
        ) and not conf.direct:
            if Backend.isDbms(DBMS.MYSQL) and kb.data.has_information_schema:
                conditionChar = "LIKE"
            else:
                conditionChar = "="

            if not len(users):
                users = self.getUsers()

                if Backend.isDbms(DBMS.MYSQL):
                    for user in users:
                        parsedUser = re.search(r"['\"]?(.*?)['\"]?\@", user)

                        if parsedUser:
                            users[users.index(user)] = parsedUser.groups()[0]

            retrievedUsers = set()

            for user in users:
                outuser = user
                if user in retrievedUsers:
                    continue

                if Backend.isDbms(
                        DBMS.MYSQL) and kb.data.has_information_schema:
                    user = "******" % user

                if Backend.isDbms(DBMS.INFORMIX):
                    count = 1
                else:
                    infoMsg = "fetching number of privileges "
                    infoMsg += "for user '%s'" % outuser
                    logger.info(infoMsg)

                    if Backend.isDbms(
                            DBMS.MYSQL) and not kb.data.has_information_schema:
                        query = rootQuery.blind.count2 % user
                    elif Backend.isDbms(
                            DBMS.MYSQL) and kb.data.has_information_schema:
                        query = rootQuery.blind.count % (conditionChar, user)
                    elif Backend.isDbms(DBMS.ORACLE) and query2:
                        query = rootQuery.blind.count2 % user
                    else:
                        query = rootQuery.blind.count % user

                    count = inject.getValue(query,
                                            union=False,
                                            error=False,
                                            expected=EXPECTED.INT,
                                            charsetType=CHARSET_TYPE.DIGITS)

                    if not isNumPosStrValue(count):
                        if not retrievedUsers and Backend.isDbms(
                                DBMS.ORACLE) and not query2:
                            infoMsg = "trying with table 'USER_SYS_PRIVS'"
                            logger.info(infoMsg)

                            return self.getPrivileges(query2=True)

                        warnMsg = "unable to retrieve the number of "
                        warnMsg += "privileges for user '%s'" % outuser
                        logger.warn(warnMsg)
                        continue

                infoMsg = "fetching privileges for user '%s'" % outuser
                logger.info(infoMsg)

                privileges = set()

                plusOne = Backend.getIdentifiedDbms() in (DBMS.ORACLE,
                                                          DBMS.DB2)
                indexRange = getLimitRange(count, plusOne=plusOne)

                for index in indexRange:
                    if Backend.isDbms(
                            DBMS.MYSQL) and not kb.data.has_information_schema:
                        query = rootQuery.blind.query2 % (user, index)
                    elif Backend.isDbms(
                            DBMS.MYSQL) and kb.data.has_information_schema:
                        query = rootQuery.blind.query % (conditionChar, user,
                                                         index)
                    elif Backend.isDbms(DBMS.ORACLE) and query2:
                        query = rootQuery.blind.query2 % (user, index)
                    elif Backend.isDbms(DBMS.FIREBIRD):
                        query = rootQuery.blind.query % (index, user)
                    elif Backend.isDbms(DBMS.INFORMIX):
                        query = rootQuery.blind.query % (user, )
                    else:
                        query = rootQuery.blind.query % (user, index)

                    privilege = unArrayizeValue(
                        inject.getValue(query, union=False, error=False))

                    if privilege is None:
                        continue

                    # In PostgreSQL we get 1 if the privilege is True,
                    # 0 otherwise
                    if Backend.isDbms(DBMS.PGSQL) and ", " in privilege:
                        privilege = privilege.replace(", ", ',')
                        privs = privilege.split(',')
                        i = 1

                        for priv in privs:
                            if priv.isdigit() and int(priv) == 1:
                                for position, pgsqlPriv in PGSQL_PRIVS.items():
                                    if position == i:
                                        privileges.add(pgsqlPriv)

                            i += 1

                    # In MySQL >= 5.0 and Oracle we get the list
                    # of privileges as string
                    elif Backend.isDbms(DBMS.ORACLE) or (Backend.isDbms(
                            DBMS.MYSQL) and kb.data.has_information_schema):
                        privileges.add(privilege)

                    # In MySQL < 5.0 we get Y if the privilege is
                    # True, N otherwise
                    elif Backend.isDbms(
                            DBMS.MYSQL) and not kb.data.has_information_schema:
                        privilege = privilege.replace(", ", ',')
                        privs = privilege.split(',')
                        i = 1

                        for priv in privs:
                            if priv.upper() == 'Y':
                                for position, mysqlPriv in MYSQL_PRIVS.items():
                                    if position == i:
                                        privileges.add(mysqlPriv)

                            i += 1

                    # In Firebird we get one letter for each privilege
                    elif Backend.isDbms(DBMS.FIREBIRD):
                        privileges.add(FIREBIRD_PRIVS[privilege.strip()])

                    # In Informix we get one letter for the highest privilege
                    elif Backend.isDbms(DBMS.INFORMIX):
                        privileges.add(INFORMIX_PRIVS[privilege.strip()])

                    # In DB2 we get Y or G if the privilege is
                    # True, N otherwise
                    elif Backend.isDbms(DBMS.DB2):
                        privs = privilege.split(',')
                        privilege = privs[0]
                        privs = privs[1]
                        privs = list(privs.strip())
                        i = 1

                        for priv in privs:
                            if priv.upper() in ('Y', 'G'):
                                for position, db2Priv in DB2_PRIVS.items():
                                    if position == i:
                                        privilege += ", " + db2Priv

                            i += 1

                        privileges.add(privilege)

                    # In MySQL < 5.0 we break the cycle after the first
                    # time we get the user's privileges otherwise we
                    # duplicate the same query
                    if Backend.isDbms(
                            DBMS.MYSQL) and not kb.data.has_information_schema:
                        break

                if privileges:
                    kb.data.cachedUsersPrivileges[user] = list(privileges)
                else:
                    warnMsg = "unable to retrieve the privileges "
                    warnMsg += "for user '%s'" % outuser
                    logger.warn(warnMsg)

                retrievedUsers.add(user)

        if not kb.data.cachedUsersPrivileges:
            errMsg = "unable to retrieve the privileges "
            errMsg += "for the database users"
            raise SqlmapNoneDataException(errMsg)

        for user, privileges in kb.data.cachedUsersPrivileges.items():
            if isAdminFromPrivileges(privileges):
                areAdmins.add(user)

        return (kb.data.cachedUsersPrivileges, areAdmins)
Beispiel #35
0
    def getPasswordHashes(self):
        infoMsg = "fetching database users password hashes"

        rootQuery = queries[Backend.getIdentifiedDbms()].passwords

        if conf.user == CURRENT_USER:
            infoMsg += " for current user"
            conf.user = self.getCurrentUser()

        logger.info(infoMsg)

        if conf.user and Backend.getIdentifiedDbms() in (DBMS.ORACLE,
                                                         DBMS.DB2):
            conf.user = conf.user.upper()

        if conf.user:
            users = conf.user.split(',')

            if Backend.isDbms(DBMS.MYSQL):
                for user in users:
                    parsedUser = re.search(r"['\"]?(.*?)['\"]?\@", user)

                    if parsedUser:
                        users[users.index(user)] = parsedUser.groups()[0]
        else:
            users = []

        users = [_ for _ in users if _]

        if any(
                isTechniqueAvailable(_)
                for _ in (PAYLOAD.TECHNIQUE.UNION, PAYLOAD.TECHNIQUE.ERROR,
                          PAYLOAD.TECHNIQUE.QUERY)) or conf.direct:
            if Backend.isDbms(DBMS.MSSQL) and Backend.isVersionWithin(
                ("2005", "2008")):
                query = rootQuery.inband.query2
            else:
                query = rootQuery.inband.query

            condition = rootQuery.inband.condition

            if conf.user:
                query += " WHERE "
                query += " OR ".join("%s = '%s'" % (condition, user)
                                     for user in sorted(users))

            if Backend.isDbms(DBMS.SYBASE):
                getCurrentThreadData().disableStdOut = True

                retVal = pivotDumpTable(
                    "(%s) AS %s" % (query, kb.aliasName),
                    ['%s.name' % kb.aliasName,
                     '%s.password' % kb.aliasName],
                    blind=False)

                if retVal:
                    for user, password in filterPairValues(
                            _zip(retVal[0]["%s.name" % kb.aliasName],
                                 retVal[0]["%s.password" % kb.aliasName])):
                        if user not in kb.data.cachedUsersPasswords:
                            kb.data.cachedUsersPasswords[user] = [password]
                        else:
                            kb.data.cachedUsersPasswords[user].append(password)

                getCurrentThreadData().disableStdOut = False
            else:
                values = inject.getValue(query, blind=False, time=False)

                if Backend.isDbms(DBMS.MSSQL) and isNoneValue(values):
                    values = inject.getValue(query.replace(
                        "master.dbo.fn_varbintohexstr",
                        "sys.fn_sqlvarbasetostr"),
                                             blind=False,
                                             time=False)
                elif Backend.isDbms(
                        DBMS.MYSQL) and (isNoneValue(values) or all(
                            len(value) == 2 and
                            (isNullValue(value[1]) or isNoneValue(value[1]))
                            for value in values)):
                    values = inject.getValue(query.replace(
                        "authentication_string", "password"),
                                             blind=False,
                                             time=False)

                for user, password in filterPairValues(values):
                    if not user or user == " ":
                        continue

                    password = parsePasswordHash(password)

                    if user not in kb.data.cachedUsersPasswords:
                        kb.data.cachedUsersPasswords[user] = [password]
                    else:
                        kb.data.cachedUsersPasswords[user].append(password)

        if not kb.data.cachedUsersPasswords and isInferenceAvailable(
        ) and not conf.direct:
            fallback = False

            if not len(users):
                users = self.getUsers()

                if Backend.isDbms(DBMS.MYSQL):
                    for user in users:
                        parsedUser = re.search(r"['\"]?(.*?)['\"]?\@", user)

                        if parsedUser:
                            users[users.index(user)] = parsedUser.groups()[0]

            if Backend.isDbms(DBMS.SYBASE):
                getCurrentThreadData().disableStdOut = True

                query = rootQuery.inband.query

                retVal = pivotDumpTable(
                    "(%s) AS %s" % (query, kb.aliasName),
                    ['%s.name' % kb.aliasName,
                     '%s.password' % kb.aliasName],
                    blind=True)

                if retVal:
                    for user, password in filterPairValues(
                            _zip(retVal[0]["%s.name" % kb.aliasName],
                                 retVal[0]["%s.password" % kb.aliasName])):
                        password = "******" % encodeHex(password,
                                                      binary=False).upper()

                        if user not in kb.data.cachedUsersPasswords:
                            kb.data.cachedUsersPasswords[user] = [password]
                        else:
                            kb.data.cachedUsersPasswords[user].append(password)

                getCurrentThreadData().disableStdOut = False
            else:
                retrievedUsers = set()

                for user in users:
                    user = unArrayizeValue(user)

                    if user in retrievedUsers:
                        continue

                    if Backend.isDbms(DBMS.INFORMIX):
                        count = 1
                    else:
                        infoMsg = "fetching number of password hashes "
                        infoMsg += "for user '%s'" % user
                        logger.info(infoMsg)

                        if Backend.isDbms(
                                DBMS.MSSQL) and Backend.isVersionWithin(
                                    ("2005", "2008")):
                            query = rootQuery.blind.count2 % user
                        else:
                            query = rootQuery.blind.count % user

                        count = inject.getValue(
                            query,
                            union=False,
                            error=False,
                            expected=EXPECTED.INT,
                            charsetType=CHARSET_TYPE.DIGITS)

                        if not isNumPosStrValue(count):
                            if Backend.isDbms(DBMS.MSSQL):
                                fallback = True
                                count = inject.getValue(
                                    query.replace(
                                        "master.dbo.fn_varbintohexstr",
                                        "sys.fn_sqlvarbasetostr"),
                                    union=False,
                                    error=False,
                                    expected=EXPECTED.INT,
                                    charsetType=CHARSET_TYPE.DIGITS)
                            elif Backend.isDbms(DBMS.MYSQL):
                                fallback = True
                                count = inject.getValue(
                                    query.replace("authentication_string",
                                                  "password"),
                                    union=False,
                                    error=False,
                                    expected=EXPECTED.INT,
                                    charsetType=CHARSET_TYPE.DIGITS)

                        if not isNumPosStrValue(count):
                            warnMsg = "unable to retrieve the number of password "
                            warnMsg += "hashes for user '%s'" % user
                            logger.warn(warnMsg)
                            continue

                    infoMsg = "fetching password hashes for user '%s'" % user
                    logger.info(infoMsg)

                    passwords = []

                    plusOne = Backend.getIdentifiedDbms() in (DBMS.ORACLE,
                                                              DBMS.DB2)
                    indexRange = getLimitRange(count, plusOne=plusOne)

                    for index in indexRange:
                        if Backend.isDbms(DBMS.MSSQL):
                            if Backend.isVersionWithin(("2005", "2008")):
                                query = rootQuery.blind.query2 % (user, index,
                                                                  user)
                            else:
                                query = rootQuery.blind.query % (user, index,
                                                                 user)

                            if fallback:
                                query = query.replace(
                                    "master.dbo.fn_varbintohexstr",
                                    "sys.fn_sqlvarbasetostr")

                        elif Backend.isDbms(DBMS.INFORMIX):
                            query = rootQuery.blind.query % (user, )

                        elif Backend.isDbms(DBMS.HSQLDB):
                            query = rootQuery.blind.query % (index, user)

                        else:
                            query = rootQuery.blind.query % (user, index)

                        if Backend.isDbms(DBMS.MYSQL):
                            if fallback:
                                query = query.replace("authentication_string",
                                                      "password")

                        password = unArrayizeValue(
                            inject.getValue(query, union=False, error=False))
                        password = parsePasswordHash(password)

                        passwords.append(password)

                    if passwords:
                        kb.data.cachedUsersPasswords[user] = passwords
                    else:
                        warnMsg = "unable to retrieve the password "
                        warnMsg += "hashes for user '%s'" % user
                        logger.warn(warnMsg)

                    retrievedUsers.add(user)

        if not kb.data.cachedUsersPasswords:
            errMsg = "unable to retrieve the password hashes for the "
            errMsg += "database users (probably because the DBMS "
            errMsg += "current user has no read privileges over the relevant "
            errMsg += "system database table(s))"
            logger.error(errMsg)
        else:
            for user in kb.data.cachedUsersPasswords:
                kb.data.cachedUsersPasswords[user] = list(
                    set(kb.data.cachedUsersPasswords[user]))

            storeHashesToFile(kb.data.cachedUsersPasswords)

            message = "do you want to perform a dictionary-based attack "
            message += "against retrieved password hashes? [Y/n/q]"
            choice = readInput(message, default='Y').upper()

            if choice == 'N':
                pass
            elif choice == 'Q':
                raise SqlmapUserQuitException
            else:
                attackCachedUsersPasswords()

        return kb.data.cachedUsersPasswords
Beispiel #36
0
    def __init__(self, additional_expressions=None):
        super(ExpressionsDict,
              self).__init__(additional_expressions=additional_expressions)

        self.expressions_dict["integral"] = "0.5"

        # blinding (of data)
        for channel in ["tt", "mt", "et", "em", "mm", "ee"]:
            pass  # self.expressions_dict["blind_"+channel+"_svfitMass"] = "((svfitMass<100.0)+(svfitMass>200.0))"

        # category cuts
        self.expressions_dict["cat_inclusive"] = "1.0"
        self.expressions_dict["cat_0jet"] = "njetspt20 < 1"
        self.expressions_dict["cat_1jet"] = "(njetspt20 > 0)*(njetspt20 < 2)"
        self.expressions_dict["cat_2jet"] = "njetspt20 > 1"

        # Z->tautau categories
        for channel in ["tt", "mt", "et", "em", "mm", "ee"]:
            self.expressions_dict["catZtt13TeV_" + channel +
                                  "_inclusive"] = "1.0"
            self.expressions_dict["catZtt13TeV_" + channel +
                                  "_2jet_inclusive"] = "(njetspt30>1)"
            self.expressions_dict[
                "catZtt13TeV_" + channel +
                "_1jet_inclusive"] = "(njetspt30>0)*(njetspt30<2)"
            self.expressions_dict["catZtt13TeV_" + channel +
                                  "_0jet_inclusive"] = "(njetspt30<1)"

        # Z->tautau polarisation categories
        for channel in ["em"]:
            self.expressions_dict["catZttPol13TeV_" + channel +
                                  "_x_oneprong"] = "(1.0)"

        for channel in ["mt", "et"]:
            self.expressions_dict["catZttPol13TeV_" + channel +
                                  "_x_a1"] = "(decayMode_2 == 10)"
            self.expressions_dict["catZttPol13TeV_" + channel +
                                  "_x_rho"] = "(decayMode_2 == 1)"
            self.expressions_dict["catZttPol13TeV_" + channel +
                                  "_x_oneprong"] = "(decayMode_2 == 0)"

        for channel in ["tt"]:
            self.expressions_dict["catZttPol13TeV_" + channel +
                                  "_a1_x"] = "((decayMode_1 == 10))"
            self.expressions_dict[
                "catZttPol13TeV_" + channel +
                "_rho_x"] = "((decayMode_1 == 1) * (decayMode_2 != 10))"
            self.expressions_dict[
                "catZttPol13TeV_" + channel +
                "_oneprong_x"] = "((decayMode_1 == 0) * (decayMode_2 != 10) * (decayMode_2 != 1))"

            self.expressions_dict[
                "catZttPol13TeV_" + channel +
                "_x_a1"] = "((decayMode_2 == 10) * (decayMode_1 != 10))"
            self.expressions_dict[
                "catZttPol13TeV_" + channel +
                "_x_rho"] = "((decayMode_2 == 1) * (decayMode_1 != 10) * (decayMode_1 != 1))"

        # Z->tautau polarisation test statistics
        for channel in ["em"]:
            self.expressions_dict["testZttPol13TeV_" + channel +
                                  "_x_oneprong"] = "visibleOverFullEnergy_2"

        for channel in ["mt", "et"]:
            self.expressions_dict[
                "testZttPol13TeV_" + channel +
                "_x_a1"] = "visibleOverFullEnergy_2"  # TODO change to dedicated a1 variable
            self.expressions_dict["testZttPol13TeV_" + channel +
                                  "_x_rho"] = "rhoNeutralChargedAsymmetry_2"
            self.expressions_dict["testZttPol13TeV_" + channel +
                                  "_x_oneprong"] = "visibleOverFullEnergy_2"

        for channel in ["tt"]:
            self.expressions_dict[
                "testZttPol13TeV_" + channel +
                "_a1_x"] = "visibleOverFullEnergy_1"  # TODO change to dedicated a1 variable
            self.expressions_dict["testZttPol13TeV_" + channel +
                                  "_rho_x"] = "rhoNeutralChargedAsymmetry_1"
            self.expressions_dict["testZttPol13TeV_" + channel +
                                  "_oneprong_x"] = "visibleOverFullEnergy_1"

            self.expressions_dict[
                "testZttPol13TeV_" + channel +
                "_x_a1"] = "visibleOverFullEnergy_2"  # TODO change to dedicated a1 variable
            self.expressions_dict["testZttPol13TeV_" + channel +
                                  "_x_rho"] = "rhoNeutralChargedAsymmetry_2"

        # H->tautau categories
        for channel in ["tt", "mt", "et", "em", "mm", "ee"]:
            pt_var = "pt_2" if channel in ["mt", "et", "em"] else "pt_1"
            pt_cut = "35.0" if channel in ["mt", "et", "tt"] else "35.0"
            #CP-studies
            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_CP_mt"] = "(genPhiStarCP>-10) * (TauMProngEnergy >= 0.44 && TauPProngEnergy >= 0.55)"
            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_CP_et"] = "(genPhiStarCP>-10) * (TauMProngEnergy >= 0.44 && TauPProngEnergy >= 0.55)"
            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_CP_em"] = "(genPhiStarCP>-10) * (TauMProngEnergy >= 0.44 && TauPProngEnergy >= 0.44)"
            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_CP_tt"] = "(genPhiStarCP>-10) * (TauMProngEnergy >= 0.55 && TauPProngEnergy >= 0.55)"

            # Standard Model
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_inclusive"] = "(1.0)"
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_inclusivemt40"] = "(1.0)"
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_2jet_inclusive"] = "(njetspt30>1)"

            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_2jet_vbf"] = self.expressions_dict[
                    "catHtt13TeV_" + channel +
                    "_2jet_inclusive"] + "*(mjj>200.0)*(jdeta>2.0)"

            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_1jet_inclusive"] = ("(! ({vbf}))".format(
                    vbf=self.expressions_dict["catHtt13TeV_" + channel +
                                              "_2jet_vbf"])) + "*(njetspt30>0)"
            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_1jet_high"] = self.expressions_dict[
                    "catHtt13TeV_" + channel +
                    "_1jet_inclusive"] + ("*({pt_var}>{pt_cut})".format(
                        pt_var=pt_var, pt_cut=pt_cut))
            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_1jet_low"] = self.expressions_dict[
                    "catHtt13TeV_" + channel +
                    "_1jet_inclusive"] + ("*({pt_var}<={pt_cut})".format(
                        pt_var=pt_var, pt_cut=pt_cut))
            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_0jet_inclusive"] = ("(! ({vbf}))*(! ({onejet}))".format(
                    vbf=self.expressions_dict["catHtt13TeV_" + channel +
                                              "_2jet_vbf"],
                    onejet=self.expressions_dict["catHtt13TeV_" + channel +
                                                 "_1jet_inclusive"]))
            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_0jet_high"] = self.expressions_dict[
                    "catHtt13TeV_" + channel +
                    "_0jet_inclusive"] + ("*({pt_var}>{pt_cut})".format(
                        pt_var=pt_var, pt_cut=pt_cut))
            self.expressions_dict[
                "catHtt13TeV_" + channel +
                "_0jet_low"] = self.expressions_dict[
                    "catHtt13TeV_" + channel +
                    "_0jet_inclusive"] + ("*({pt_var}<={pt_cut})".format(
                        pt_var=pt_var, pt_cut=pt_cut))

            # Standard Model experimental
            boosted_higgs_string = "(H_pt>100)"
            boosted_higgs_medium_string = "(H_pt>50)"
            boosted_higgs_low_string = "(H_pt>30)"
            vbf_medium_string = "(mjj>500&&jdeta>3.5)"
            vbf_loose_string = "(mjj>200&&jdeta>2)"
            jet2_string = "(njetspt30>1)"
            jet1_string = "(njetspt30>0)"
            pt2_tight_string = "(pt_2>=45)"
            pt2_medium_string = "(pt_2>=35)"
            pt2_loose_string = "(pt_2>=25)"
            eta_hard_string = "jdeta>4.0"
            # used in CERN signal extraction study
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_vbf"] = self.combine(
                                      [vbf_medium_string, jet2_string])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_1jet_boosted"] = self.combine([
                                      jet1_string,
                                      self.invert(vbf_medium_string),
                                      boosted_higgs_string, pt2_tight_string
                                  ])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_1jet_highpt2"] = self.combine([
                                      jet1_string,
                                      self.invert(vbf_medium_string),
                                      self.invert(boosted_higgs_string),
                                      pt2_tight_string
                                  ])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_1jet_lowpt2"] = self.combine([
                                      jet1_string,
                                      self.invert(vbf_medium_string),
                                      self.invert(pt2_tight_string)
                                  ])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_0jet_highpt2"] = self.combine([
                                      self.invert(jet1_string),
                                      pt2_tight_string
                                  ])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_0jet_lowpt2"] = self.combine([
                                      self.invert(jet1_string),
                                      self.invert(pt2_tight_string)
                                  ])
            # motivated by s/sqrt(b) efficiency
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_vbf_tag"] = self.combine([
                                      jet2_string, boosted_higgs_medium_string,
                                      eta_hard_string
                                  ])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_2jet_untagged"] = self.combine([
                                      jet2_string,
                                      self.invert(
                                          self.combine([
                                              boosted_higgs_medium_string,
                                              eta_hard_string
                                          ]))
                                  ])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_1jet_boost_high"] = self.combine(
                                      [jet1_string, boosted_higgs_string])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_1jet_boost_medium"] = self.combine([
                                      jet1_string,
                                      self.invert(boosted_higgs_string),
                                      boosted_higgs_low_string
                                  ])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_1jet_boost_low"] = self.combine([
                                      jet1_string,
                                      self.invert(boosted_higgs_low_string)
                                  ])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_0jet_nhighpt2"] = self.combine([
                                      self.invert(jet1_string),
                                      pt2_tight_string
                                  ])
            self.expressions_dict["catHtt13TeV_" + channel +
                                  "_0jet_nlowpt2"] = self.combine([
                                      self.invert(jet1_string),
                                      self.invert(pt2_tight_string)
                                  ])

        # MSSSM
        for channel in ["et", "mt", "tt", "em"]:
            self.expressions_dict["catHttMSSM13TeV_" + channel +
                                  "_inclusive"] = "(1.0)"
            self.expressions_dict["catHttMSSM13TeV_" + channel +
                                  "_inclusivemt40"] = "(1.0)"
            self.expressions_dict["catHttMSSM13TeV_" + channel +
                                  "_nobtag"] = "(nbtag==0)"
            self.expressions_dict["catHttMSSM13TeV_" + channel +
                                  "_btag"] = "(njets<=1)*(nbtag>=1)"
        for channel in ["et", "mt", "tt"]:
            pt_var = "pt_2" if channel in ["mt", "et"] else "pt_1"
            pt_cut_nobtag_high = "60.0" if channel in ["mt", "et"] else "80.0"
            pt_cut_nobtag_medium = "45.0" if channel in ["mt", "et"
                                                         ] else "60.0"
            pt_cut_nobtag_low = "30.0" if channel in ["mt", "et"] else "45.0"
            pt_cut_btag_high = "45.0" if channel in ["mt", "et"] else "60.0"
            pt_cut_btag_low = "30.0" if channel in ["mt", "et"] else "45.0"
            self.expressions_dict[
                "catHttMSSM13TeV_" + channel +
                "_nobtag_high"] = self.expressions_dict[
                    "catHttMSSM13TeV_" + channel +
                    "_nobtag"] + "*({pt_var}>{pt_cut})".format(
                        pt_var=pt_var, pt_cut=pt_cut_nobtag_high)
            self.expressions_dict[
                "catHttMSSM13TeV_" + channel +
                "_nobtag_medium"] = self.expressions_dict[
                    "catHttMSSM13TeV_" + channel +
                    "_nobtag"] + "*({pt_var}<={pt_cut_1})*({pt_var}>{pt_cut_2})".format(
                        pt_var=pt_var,
                        pt_cut_1=pt_cut_nobtag_high,
                        pt_cut_2=pt_cut_nobtag_medium)
            self.expressions_dict[
                "catHttMSSM13TeV_" + channel +
                "_nobtag_low"] = self.expressions_dict[
                    "catHttMSSM13TeV_" + channel +
                    "_nobtag"] + "*({pt_var}<={pt_cut_1})*({pt_var}>{pt_cut_2})".format(
                        pt_var=pt_var,
                        pt_cut_1=pt_cut_nobtag_medium,
                        pt_cut_2=pt_cut_nobtag_low)
            self.expressions_dict["catHttMSSM13TeV_" + channel +
                                  "_btag_high"] = self.expressions_dict[
                                      "catHttMSSM13TeV_" + channel +
                                      "_btag"] + "*({pt_var}>{pt_cut})".format(
                                          pt_var=pt_var,
                                          pt_cut=pt_cut_btag_high)
            self.expressions_dict[
                "catHttMSSM13TeV_" + channel +
                "_btag_low"] = self.expressions_dict[
                    "catHttMSSM13TeV_" + channel +
                    "_btag"] + "*({pt_var}<={pt_cut_1})*({pt_var}>{pt_cut_2})".format(
                        pt_var=pt_var,
                        pt_cut_1=pt_cut_btag_high,
                        pt_cut_2=pt_cut_btag_low)

        # MVA Htt categories
        #self.expressions_dict["mt_vbf_pre"] = "((0.3<=ttj_1)*(0.45<=ztt_1))"
        #self.expressions_dict["mt_vbf_sig"] = "{pre}*(0.8<=vbf_1)".format(pre=self.expressions_dict["mt_vbf_pre"])
        #self.expressions_dict["mt_vbf_like"] = "{pre}*(-0.5<=vbf_1&&vbf_1<0.8)".format(pre=self.expressions_dict["mt_vbf_pre"])
        #self.expressions_dict["mt_vbf_bkg"] = "{pre}*(vbf_1<-0.5)".format(pre=self.expressions_dict["mt_vbf_pre"])
        #self.expressions_dict["mt_vbf_rest"] = "!{pre}".format(pre=self.expressions_dict["mt_vbf_pre"])
        #self.expressions_dict["mt_2jets_all"] = "(njetspt30>1)"
        #self.expressions_dict["mt_1jets_all"] = "(njetspt30==1)"
        #self.expressions_dict["mt_0jets_all"] = "(njetspt30==0)"
        #self.expressions_dict["mt_2jets_vbfbdt"] = "(0.8<=vbf_1)"
        #self.expressions_dict["mt_2jet_vbf_bdt"] = "({pre}*(0.8<=vbf_1))".format(pre=self.expressions_dict["mt_vbf_pre"])
        #self.expressions_dict["mt_1jet_inclusive_bdt"] = ("((! {vbf})".format(
        #vbf=self.expressions_dict["mt_2jet_vbf_bdt"]
        #))+"*(njetspt30>0))"
        #self.expressions_dict["mt_1jet_sig"] = self.expressions_dict["mt_1jet_inclusive_bdt"]+"*((0.4<=ttj_1)*(0.4<=ztt_1))"
        #self.expressions_dict["mt_1jet_bkg"] = self.expressions_dict["mt_1jet_inclusive_bdt"]+"*(!((0.4<=ttj_1)*(0.4<=ztt_1)))"
        #self.expressions_dict["mt_0jet_inclusive_bdt"] = ("(!{vbf})*(!{onejet})".format(
        #vbf=self.expressions_dict["mt_2jet_vbf_bdt"],
        #onejet=self.expressions_dict["mt_1jet_inclusive_bdt"]
        #))
        #self.expressions_dict["mt_0jet_sig"] = self.expressions_dict["mt_0jet_inclusive_bdt"]+"*((-0.6<=ttj_1)*(0.2<=ztt_1))"
        #self.expressions_dict["mt_0jet_bkg"] = self.expressions_dict["mt_0jet_inclusive_bdt"]+"*(!((-0.6<=ttj_1)*(0.2<=ztt_1)))"

        #for channel in ["tt", "mt", "et", "em"]:
        #for classic in ["0jet_high", "0jet_low", "1jet_high", "1jet_low", "2jet_vbf"]:
        #self.expressions_dict["{channel}_{classic}".format(channel=channel, classic=classic)] = self.expressions_dict["catHtt13TeV_{channel}_{classic}".format(channel=channel, classic=classic)]
        ##========================================Copy here!========================================
        #expressions_path = os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/mva_configs/%s_expressions.cfg"%channel)
        #if not os.path.exists(expressions_path):
        #continue
        #self.expressions_dict["%s_inclusive"%(channel)] = "(1.0)"
        #with open(expressions_path, "r") as exps:
        #regular_name = ""
        #for line in exps:
        ##log.info(line)
        #vbf, name, values = map(strip, line.split(" : "))
        #values = map(float, values.split(" "))
        #values.pop(0)
        #values.pop(-1)
        #if vbf == "regular_name":
        #self.expressions_dict["%s_%s_signal"%(channel,name)] = "(%f <= %s)"%(values[1], name)
        #self.expressions_dict["%s_%s_mixed"%(channel,name)] = "(%f <= %s && %s < %f)"%(values[0], name, name, values[1])
        #self.expressions_dict["%s_%s_bkg"%(channel,name)] = "(%s < %f)"%(name, values[0])
        #regular_name= name
        #continue
        #elif vbf == "vbf_tagger":
        #if regular_name == "":
        #log.fatal("Please check if cuts in file %s are in correct order"%expressions_path)
        #sys.exit()
        #self.expressions_dict["{channel}_{vbf_tagger}_{mva_name}_tagged_signal".format(
        #channel=channel, vbf_tagger=name, mva_name=regular_name)]=self.expressions_dict["{channel}_{reg_name}_signal".format(channel=channel, reg_name=regular_name)]+"*({upper} <= {vbf_tagger})".format(upper=values[0], vbf_tagger=name)
        #self.expressions_dict["{channel}_{vbf_tagger}_{mva_name}_not_tagged_signal".format(
        #channel=channel, vbf_tagger=name, mva_name=regular_name)]=self.expressions_dict["{channel}_{reg_name}_signal".format(channel=channel, reg_name=regular_name)]+"*({lower} > {vbf_tagger})".format(lower=values[0], vbf_tagger=name)
        #expressions_path = os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/mva_configs/%s_shift_expressions.cfg"%channel)
        #if not os.path.exists(expressions_path):
        #continue
        #shifts_dict = jsonTools.JsonDict(expressions_path)
        #self.expressions_dict.update(shifts_dict)
        #========================================Copy here!=======================================
        self.expressions_dict["cat_OneProng"] = "(decayMode_2 == 0)"
        self.expressions_dict["catOneProng"] = self.expressions_dict[
            "cat_OneProng"]
        for channel in ["mt", "et"]:
            self.expressions_dict[
                "catOneProng_" +
                channel] = self.expressions_dict["catOneProng"]

        self.expressions_dict[
            "cat_OneProngPiZeros"] = "(decayMode_2 >= 1)*(decayMode_2 <= 2)"
        self.expressions_dict["catOneProngPiZeros"] = self.expressions_dict[
            "cat_OneProngPiZeros"]
        for channel in ["mt", "et"]:
            self.expressions_dict[
                "catOneProngPiZeros_" +
                channel] = self.expressions_dict["catOneProngPiZeros"]

        self.expressions_dict["cat_ThreeProng"] = "(decayMode_2 == 10)"
        self.expressions_dict["catThreeProng"] = self.expressions_dict[
            "cat_ThreeProng"]
        for channel in ["mt", "et"]:
            self.expressions_dict[
                "catThreeProng_" +
                channel] = self.expressions_dict["catThreeProng"]

        self.expressions_dict[
            "cat_AllDMs"] = "(decayMode_2 >= 0)*(decayMode_2 <= 10)"
        self.expressions_dict["catAllDMs"] = self.expressions_dict[
            "cat_AllDMs"]
        for channel in ["mt", "et"]:
            self.expressions_dict["catAllDMs_" +
                                  channel] = self.expressions_dict["catAllDMs"]

        self.expressions_dict[
            "cat_AllDMsNotOneProng"] = "(decayMode_2 >= 1)*(decayMode_2 <= 10)"
        self.expressions_dict["catAllDMsNotOneProng"] = self.expressions_dict[
            "cat_AllDMsNotOneProng"]
        for channel in ["mt", "et"]:
            self.expressions_dict[
                "catAllDMsNotOneProng_" +
                channel] = self.expressions_dict["catAllDMsNotOneProng"]

        #==========================CategoriesDictUpdates=========================================================
        import Artus.Utility.jsonTools as jsonTools
        import HiggsAnalysis.KITHiggsToTauTau.plotting.configs.categories as Categories
        categoriesUpdate = Categories.CategoriesDict().getExpressionsDict()
        self.expressions_dict.update(categoriesUpdate)

        replacements = {
            "0jet": "zerojet",
            "1jet": "onejet",
            "2jet": "twojet",
        }
        for short_expression, long_expression in self.expressions_dict.items():
            if any([
                    replacement in short_expression
                    for replacement in replacements.keys()
            ]):
                new_short_expression = short_expression
                for replacement in replacements.iteritems():
                    new_short_expression = new_short_expression.replace(
                        *replacement)
                self.expressions_dict[new_short_expression] = long_expression
Beispiel #37
0
 def has_stopped_suites(self):
     """Return True if we have any stopped suite information."""
     return any(
         KEY_PORT not in result for result in self.suite_info_map.values())
Beispiel #38
0
def farthest_points(*args):
    """Return the subset of points from a set of points that were
    the furthest apart from each other in the 2D plane.

    Parameters
    ==========

    args : a collection of Points on 2D plane.

    Notes
    =====

    This can only be performed on a set of points whose coordinates can
    be ordered on the number line. If there are no ties then a single
    pair of Points will be in the set.

    References
    ==========

    [1] http://code.activestate.com/recipes/117225-convex-hull-and-diameter-of-2d-point-sets/

    [2] Rotating Callipers Technique
    https://en.wikipedia.org/wiki/Rotating_calipers

    Examples
    ========

    >>> from sympy.geometry import farthest_points, Point2D, Triangle
    >>> Triangle(sss=(3, 4, 5)).args
    (Point2D(0, 0), Point2D(3, 0), Point2D(3, 4))
    >>> farthest_points(*_)
    {(Point2D(0, 0), Point2D(3, 4))}

    """
    from math import hypot, sqrt as _sqrt

    def rotatingCalipers(Points):
        U, L = convex_hull(*Points, **dict(polygon=False))

        if L is None:
            if isinstance(U, Point):
                raise ValueError('At least two distinct points must be given.')
            yield U.args
        else:
            i = 0
            j = len(L) - 1
            while i < len(U) - 1 or j > 0:
                yield U[i], L[j]
                # if all the way through one side of hull, advance the other side
                if i == len(U) - 1:
                    j -= 1
                elif j == 0:
                    i += 1
                # still points left on both lists, compare slopes of next hull edges
                # being careful to avoid divide-by-zero in slope calculation
                elif (U[i+1].y - U[i].y) * (L[j].x - L[j-1].x) > \
                        (L[j].y - L[j-1].y) * (U[i+1].x - U[i].x):
                    i += 1
                else:
                    j -= 1

    p = [Point2D(i) for i in set(args)]

    if any(not i.is_Rational for j in p for i in j.args):
        def hypot(x, y):
            arg = x*x + y*y
            if arg.is_Rational:
                return _sqrt(arg)
            return sqrt(arg)

    rv = []
    diam = 0
    for pair in rotatingCalipers(args):
        h, q = _ordered_points(pair)
        d = hypot(h.x - q.x, h.y - q.y)
        if d > diam:
            rv = [(h, q)]
        elif d == diam:
            rv.append((h, q))
        else:
            continue
        diam = d

    return set(rv)
Beispiel #39
0
 def understand(image_file):
     try:
         params = FormatXTC.params_from_phil(rayonix_locator_scope, image_file)
     except Exception:
         return False
     return any(["rayonix" in src.lower() for src in params.detector_address])
Beispiel #40
0
def closest_points(*args):
    """Return the subset of points from a set of points that were
    the closest to each other in the 2D plane.

    Parameters
    ==========

    args : a collection of Points on 2D plane.

    Notes
    =====

    This can only be performed on a set of points whose coordinates can
    be ordered on the number line. If there are no ties then a single
    pair of Points will be in the set.

    References
    ==========

    [1] http://www.cs.mcgill.ca/~cs251/ClosestPair/ClosestPairPS.html

    [2] Sweep line algorithm
    https://en.wikipedia.org/wiki/Sweep_line_algorithm

    Examples
    ========

    >>> from sympy.geometry import closest_points, Point2D, Triangle
    >>> Triangle(sss=(3, 4, 5)).args
    (Point2D(0, 0), Point2D(3, 0), Point2D(3, 4))
    >>> closest_points(*_)
    {(Point2D(0, 0), Point2D(3, 0))}

    """
    from collections import deque
    from math import hypot, sqrt as _sqrt
    from sympy.core.power import sqrt

    p = [Point2D(i) for i in set(args)]
    if len(p) < 2:
        raise ValueError('At least 2 distinct points must be given.')

    try:
        p.sort(key=lambda x: x.args)
    except TypeError:
        raise ValueError("The points could not be sorted.")

    if any(not i.is_Rational for j in p for i in j.args):
        def hypot(x, y):
            arg = x*x + y*y
            if arg.is_Rational:
                return _sqrt(arg)
            return sqrt(arg)

    rv = [(0, 1)]
    best_dist = hypot(p[1].x - p[0].x, p[1].y - p[0].y)
    i = 2
    left = 0
    box = deque([0, 1])
    while i < len(p):
        while left < i and p[i][0] - p[left][0] > best_dist:
            box.popleft()
            left += 1

        for j in box:
            d = hypot(p[i].x - p[j].x, p[i].y - p[j].y)
            if d < best_dist:
                rv = [(j, i)]
            elif d == best_dist:
                rv.append((j, i))
            else:
                continue
            best_dist = d
        box.append(i)
        i += 1

    return {tuple([p[i] for i in pair]) for pair in rv}
Beispiel #41
0
    def getSources(self, title, year, imdb, tvdb, season, episode, tvshowtitle, premiered, timeout=30):

        progressDialog = control.progressDialog if control.setting('progress.dialog') == '0' else control.progressDialogBG
        progressDialog.create(control.addonInfo('name'), '')
        progressDialog.update(0)

        self.prepareSources()

        sourceDict = self.sourceDict

        content = 'movie' if tvshowtitle == None else 'episode'
        if content == 'movie':
            sourceDict = [(i[0], i[1], getattr(i[1], 'movie', None)) for i in sourceDict]
        else:
            sourceDict = [(i[0], i[1], getattr(i[1], 'tvshow', None)) for i in sourceDict]
        sourceDict = [(i[0], i[1]) for i in sourceDict if not i[2] == None]

        language = self.getLanguage()
        sourceDict = [(i[0], i[1], i[1].language) for i in sourceDict]
        sourceDict = [(i[0], i[1]) for i in sourceDict if any(x in i[2] for x in language)]

        try: sourceDict = [(i[0], i[1], control.setting('provider.' + i[0])) for i in sourceDict]
        except: sourceDict = [(i[0], i[1], 'true') for i in sourceDict]
        sourceDict = [(i[0], i[1]) for i in sourceDict if not i[2] == 'false']

        sourceDict = [(i[0], i[1], i[1].priority) for i in sourceDict]

        threads = []

        if content == 'movie':
            title = self.getTitle(title)
            localtitle = self.getLocalTitle(title, imdb, tvdb, content)
            for i in sourceDict: threads.append(workers.Thread(self.getMovieSource, title, localtitle, year, imdb, i[0], i[1]))
        else:
            tvshowtitle = self.getTitle(tvshowtitle)
            localtvshowtitle = self.getLocalTitle(tvshowtitle, imdb, tvdb, content)
            for i in sourceDict: threads.append(workers.Thread(self.getEpisodeSource, title, year, imdb, tvdb, season, episode, tvshowtitle, localtvshowtitle, premiered, i[0], i[1]))

        s = [i[0] + (i[1],) for i in zip(sourceDict, threads)]
        s = [(i[3].getName(), i[0], i[2]) for i in s]

        mainsourceDict = [i[0] for i in s if i[2] == 0]
        sourcelabelDict = dict([(i[0], i[1].upper()) for i in s])

        [i.start() for i in threads]

        string1 = control.lang(32404).encode('utf-8')
        string2 = control.lang(32405).encode('utf-8')
        string3 = control.lang(32406).encode('utf-8')

        try: timeout = int(control.setting('scrapers.timeout.1'))
        except: pass

        for i in range(0, (timeout * 2) + 60):
            try:
                if xbmc.abortRequested == True: return sys.exit()

                try: info = [sourcelabelDict[x.getName()] for x in threads if x.is_alive() == True]
                except: info = []

                timerange = int(i * 0.5)

                try:
                    if progressDialog.iscanceled(): break
                except:
                    pass
                try:
                    string4 = string1 % str(timerange)
                    if len(info) > 5: string5 = string3 % str(len(info))
                    else: string5 = string3 % str(info).translate(None, "[]'")
                    progressDialog.update(int((100 / float(len(threads))) * len([x for x in threads if x.is_alive() == False])), str(string4), str(string5))
                except:
                    pass

                is_alive = [x.is_alive() for x in threads]
                if all(x == False for x in is_alive): break

                if timerange >= timeout:
                    is_alive = [x for x in threads if x.is_alive() == True and x.getName() in mainsourceDict]
                    if not is_alive: break

                time.sleep(0.5)
            except:
                pass

        try: progressDialog.close()
        except: pass

        self.sourcesFilter()

        return self.sources
Beispiel #42
0
    def tell(self, *args, **kwargs):
        return self.fp.tell(*args, **kwargs)

    @property
    def closed(self):
        return self.fp.closed

    @property
    def name(self):
        return self.fp.name


Relate = namedtuple('Relate', ['fulltext', 'dependencies', 'triples'])
# make this namedtuple class work in a bool context: False iff all
# elements are falsy
Relate.__bool__ = lambda self: any(self)


class DocumentStore(object):
    """Unifies handling of reading and writing of various data files
    during the ``download``, ``parse`` and ``generate`` stages.

    :param datadir: The root directory (including docrepo path
                    segment) where files are stored.
    :type datadir: str
    :param storage_policy: Some repositories have documents in several
                           formats, documents split amongst several
                           files or embedded resources. If
                           ``storage_policy`` is set to ``dir``, then
                           each document gets its own directory (the
                           default filename being ``index`` +suffix),
Beispiel #43
0
 def __call__(self, *args):
     return any(f(*args) for f in self._filters)
Beispiel #44
0
    def update(self, frame_time, tracked_objects):
        self.current_frame_time = frame_time
        # get the new frame and delete the old frame
        frame_id = f"{self.name}{frame_time}"
        self.current_frame = self.frame_manager.get(frame_id)
        if not self.previous_frame_id is None:
            self.frame_manager.delete(self.previous_frame_id)
        self.previous_frame_id = frame_id

        current_ids = tracked_objects.keys()
        previous_ids = self.tracked_objects.keys()
        removed_ids = list(set(previous_ids).difference(current_ids))
        new_ids = list(set(current_ids).difference(previous_ids))
        updated_ids = list(set(current_ids).intersection(previous_ids))

        for id in new_ids:
            self.tracked_objects[id] = tracked_objects[id]
            self.tracked_objects[id]['zones'] = []

            # start the score history
            self.tracked_objects[id]['score_history'] = [self.tracked_objects[id]['score']]

            # calculate if this is a false positive
            self.tracked_objects[id]['computed_score'] = self.compute_score(self.tracked_objects[id])
            self.tracked_objects[id]['false_positive'] = self.false_positive(self.tracked_objects[id])

            # call event handlers
            for c in self.callbacks['start']:
                c(self.name, tracked_objects[id])
        
        for id in updated_ids:
            self.tracked_objects[id].update(tracked_objects[id])

            # if the object is not in the current frame, add a 0.0 to the score history
            if self.tracked_objects[id]['frame_time'] != self.current_frame_time:
                self.tracked_objects[id]['score_history'].append(0.0)
            else:
                self.tracked_objects[id]['score_history'].append(self.tracked_objects[id]['score'])
            # only keep the last 10 scores
            if len(self.tracked_objects[id]['score_history']) > 10:
                self.tracked_objects[id]['score_history'] = self.tracked_objects[id]['score_history'][-10:]

            # calculate if this is a false positive
            self.tracked_objects[id]['computed_score'] = self.compute_score(self.tracked_objects[id])
            self.tracked_objects[id]['false_positive'] = self.false_positive(self.tracked_objects[id])

            # call event handlers
            for c in self.callbacks['update']:
                c(self.name, self.tracked_objects[id])
        
        for id in removed_ids:
            # publish events to mqtt
            self.tracked_objects[id]['end_time'] = frame_time
            for c in self.callbacks['end']:
                c(self.name, self.tracked_objects[id])
            del self.tracked_objects[id]

        # check to see if the objects are in any zones
        for obj in self.tracked_objects.values():
            current_zones = []
            bottom_center = (obj['centroid'][0], obj['box'][3])
            # check each zone
            for name, zone in self.config['zones'].items():
                contour = zone['contour']
                # check if the object is in the zone and not filtered
                if (cv2.pointPolygonTest(contour, bottom_center, False) >= 0 
                    and not zone_filtered(obj, zone.get('filters', {}))):
                    current_zones.append(name)
            obj['zones'] = current_zones
        
        # draw on the frame
        if not self.current_frame is None:
            # draw the bounding boxes on the frame
            for obj in self.tracked_objects.values():
                thickness = 2
                color = COLOR_MAP[obj['label']]
                
                if obj['frame_time'] != frame_time:
                    thickness = 1
                    color = (255,0,0)

                # draw the bounding boxes on the frame
                box = obj['box']
                draw_box_with_label(self.current_frame, box[0], box[1], box[2], box[3], obj['label'], f"{int(obj['score']*100)}% {int(obj['area'])}", thickness=thickness, color=color)
                # draw the regions on the frame
                region = obj['region']
                cv2.rectangle(self.current_frame, (region[0], region[1]), (region[2], region[3]), (0,255,0), 1)
            
            if self.config['snapshots']['show_timestamp']:
                time_to_show = datetime.datetime.fromtimestamp(frame_time).strftime("%m/%d/%Y %H:%M:%S")
                cv2.putText(self.current_frame, time_to_show, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2)

            if self.config['snapshots']['draw_zones']:
                for name, zone in self.config['zones'].items():
                    thickness = 8 if any([name in obj['zones'] for obj in self.tracked_objects.values()]) else 2
                    cv2.drawContours(self.current_frame, [zone['contour']], -1, zone['color'], thickness)

        # maintain best objects
        for obj in self.tracked_objects.values():
            object_type = obj['label']
            # if the object wasn't seen on the current frame, skip it
            if obj['frame_time'] != self.current_frame_time or obj['false_positive']:
                continue
            obj_copy = copy.deepcopy(obj)
            if object_type in self.best_objects:
                current_best = self.best_objects[object_type]
                now = datetime.datetime.now().timestamp()
                # if the object is a higher score than the current best score 
                # or the current object is older than desired, use the new object
                if obj_copy['score'] > current_best['score'] or (now - current_best['frame_time']) > self.config.get('best_image_timeout', 60):
                    obj_copy['frame'] = np.copy(self.current_frame)
                    self.best_objects[object_type] = obj_copy
                    for c in self.callbacks['snapshot']:
                        c(self.name, self.best_objects[object_type])
            else:
                obj_copy['frame'] = np.copy(self.current_frame)
                self.best_objects[object_type] = obj_copy
                for c in self.callbacks['snapshot']:
                    c(self.name, self.best_objects[object_type])
        
        # update overall camera state for each object type
        obj_counter = Counter()
        for obj in self.tracked_objects.values():
            if not obj['false_positive']:
                obj_counter[obj['label']] += 1
                
        # report on detected objects
        for obj_name, count in obj_counter.items():
            new_status = 'ON' if count > 0 else 'OFF'
            if new_status != self.object_status[obj_name]:
                self.object_status[obj_name] = new_status
                for c in self.callbacks['object_status']:
                    c(self.name, obj_name, new_status)

        # expire any objects that are ON and no longer detected
        expired_objects = [obj_name for obj_name, status in self.object_status.items() if status == 'ON' and not obj_name in obj_counter]
        for obj_name in expired_objects:
            self.object_status[obj_name] = 'OFF'
            for c in self.callbacks['object_status']:
                c(self.name, obj_name, 'OFF')
            for c in self.callbacks['snapshot']:
                c(self.name, self.best_objects[obj_name])
Beispiel #45
0
def train(args, train_dataset, model, tokenizer):
    """ Train the model """
    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size,
                                  collate_fn=collate_fn)

    if args.max_steps > 0:
        num_training_steps = args.max_steps
        args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        num_training_steps = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
    args.warmup_steps = int(num_training_steps * args.warmup_proportion)
    
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
         'weight_decay': args.weight_decay},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    
    optimizer = AdamW(params=optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=num_training_steps)
    
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", num_training_steps)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    seed_everything(args.seed)  # Added here for reproductibility (even between python 2 and 3)
    for _ in range(int(args.num_train_epochs)):
        pbar = ProgressBar(n_total=len(train_dataloader), desc='Training')
        for step, batch in enumerate(train_dataloader):
            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            print(args.device)
            inputs = {'input_ids': batch[0],
                      'attention_mask': batch[1],
                      'labels': batch[3]}
            inputs['token_type_ids'] = batch[2]
            outputs = model(**inputs)
            loss = outputs[0]  # model outputs are always tuple in transformers (see doc)

            if args.n_gpu > 1:
                loss = loss.mean()  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1
            

            if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                #Log metrics
                if args.local_rank == -1:  # Only evaluate when single GPU otherwise metrics may not average well
                    evaluate(args, model, tokenizer)

            # if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
                # Save model checkpoint
                # output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step))
                # if not os.path.exists(output_dir):
                #     os.makedirs(output_dir)
                # model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
                # model_to_save.save_pretrained(output_dir)
                # torch.save(args, os.path.join(output_dir, 'training_args.bin'))
                # logger.info("Saving model checkpoint to %s", output_dir)
            pbar(step, {'loss': loss.item()})
        print(" ")
        if 'cuda' in str(args.device):
            torch.cuda.empty_cache()

    output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
    model_to_save.save_pretrained(output_dir)
    torch.save(args, os.path.join(output_dir, 'training_args.bin'))
    logger.info("Saving model checkpoint to %s", output_dir)


    return global_step, tr_loss / global_step
Beispiel #46
0
def is_word_in_line(word, line):
    return any(string == word[1] for (_, string) in line)
Beispiel #47
0
def plot_epochs_image(epochs, picks=None, sigma=0., vmin=None,
                      vmax=None, colorbar=True, order=None, show=True,
                      units=None, scalings=None, cmap=None, fig=None,
                      axes=None, overlay_times=None, combine=None,
                      group_by=None, evoked=True, ts_args=None, title=None,
                      clear=False):
    """Plot Event Related Potential / Fields image.

    Parameters
    ----------
    epochs : instance of Epochs
        The epochs.
    %(picks_good_data)s
        ``picks`` interacts with ``group_by`` and ``combine`` to determine the
        number of figures generated; see Notes.
    sigma : float
        The standard deviation of a Gaussian smoothing window applied along
        the epochs axis of the image. If 0, no smoothing is applied.
        Defaults to 0.
    vmin : None | float | callable
        The min value in the image (and the ER[P/F]). The unit is µV for
        EEG channels, fT for magnetometers and fT/cm for gradiometers.
        If vmin is None and multiple plots are returned, the limit is
        equalized within channel types.
        Hint: to specify the lower limit of the data, use
        ``vmin=lambda data: data.min()``.
    vmax : None | float | callable
        The max value in the image (and the ER[P/F]). The unit is µV for
        EEG channels, fT for magnetometers and fT/cm for gradiometers.
        If vmin is None and multiple plots are returned, the limit is
        equalized within channel types.
    colorbar : bool
        Display or not a colorbar.
    order : None | array of int | callable
        If not ``None``, order is used to reorder the epochs along the y-axis
        of the image. If it is an array of :class:`int`, its length should
        match the number of good epochs. If it is a callable it should accept
        two positional parameters (``times`` and ``data``, where
        ``data.shape == (len(good_epochs), len(times))``) and return an
        :class:`array <numpy.ndarray>` of indices that will sort ``data`` along
        its first axis.
    show : bool
        Show figure if True.
    units : dict | None
        The units of the channel types used for axes labels. If None,
        defaults to ``units=dict(eeg='µV', grad='fT/cm', mag='fT')``.
    scalings : dict | None
        The scalings of the channel types to be applied for plotting.
        If None, defaults to ``scalings=dict(eeg=1e6, grad=1e13, mag=1e15,
        eog=1e6)``.
    cmap : None | colormap | (colormap, bool) | 'interactive'
        Colormap. If tuple, the first value indicates the colormap to use and
        the second value is a boolean defining interactivity. In interactive
        mode the colors are adjustable by clicking and dragging the colorbar
        with left and right mouse button. Left mouse button moves the scale up
        and down and right mouse button adjusts the range. Hitting space bar
        resets the scale. Up and down arrows can be used to change the
        colormap. If 'interactive', translates to ('RdBu_r', True).
        If None, "RdBu_r" is used, unless the data is all positive, in which
        case "Reds" is used.
    fig : Figure | None
        :class:`~matplotlib.figure.Figure` instance to draw the image to.
        Figure must contain the correct number of axes for drawing the epochs
        image, the evoked response, and a colorbar (depending on values of
        ``evoked`` and ``colorbar``). If ``None`` a new figure is created.
        Defaults to ``None``.
    axes : list of Axes | dict of list of Axes | None
        List of :class:`~matplotlib.axes.Axes` objects in which to draw the
        image, evoked response, and colorbar (in that order). Length of list
        must be 1, 2, or 3 (depending on values of ``colorbar`` and ``evoked``
        parameters). If a :class:`dict`, each entry must be a list of Axes
        objects with the same constraints as above. If both ``axes`` and
        ``group_by`` are dicts, their keys must match. Providing non-``None``
        values for both ``fig`` and ``axes``  results in an error. Defaults to
        ``None``.
    overlay_times : array_like, shape (n_epochs,) | None
        Times (in seconds) at which to draw a line on the corresponding row of
        the image (e.g., a reaction time associated with each epoch). Note that
        ``overlay_times`` should be ordered to correspond with the
        :class:`~mne.Epochs` object (i.e., ``overlay_times[0]`` corresponds to
        ``epochs[0]``, etc).
    %(combine)s
        If callable, the callable must accept one positional input (data of
        shape ``(n_epochs, n_channels, n_times)``) and return an
        :class:`array <numpy.ndarray>` of shape ``(n_epochs, n_times)``. For
        example::

            combine = lambda data: np.median(data, axis=1)

        If ``combine`` is ``None``, channels are combined by computing GFP,
        unless ``group_by`` is also ``None`` and ``picks`` is a list of
        specific channels (not channel types), in which case no combining is
        performed and each channel gets its own figure. See Notes for further
        details. Defaults to ``None``.
    group_by : None | dict
        Specifies which channels are aggregated into a single figure, with
        aggregation method determined by the ``combine`` parameter. If not
        ``None``, one :class:`~matplotlib.figure.Figure` is made per dict
        entry; the dict key will be used as the figure title and the dict
        values must be lists of picks (either channel names or integer indices
        of ``epochs.ch_names``). For example::

            group_by=dict(Left_ROI=[1, 2, 3, 4], Right_ROI=[5, 6, 7, 8])

        Note that within a dict entry all channels must have the same type.
        ``group_by`` interacts with ``picks`` and ``combine`` to determine the
        number of figures generated; see Notes. Defaults to ``None``.
    evoked : bool
        Draw the ER[P/F] below the image or not.
    ts_args : None | dict
        Arguments passed to a call to `~mne.viz.plot_compare_evokeds` to style
        the evoked plot below the image. Defaults to an empty dictionary,
        meaning `~mne.viz.plot_compare_evokeds` will be called with default
        parameters.
    title : None | str
        If :class:`str`, will be plotted as figure title. Otherwise, the
        title will indicate channel(s) or channel type being plotted. Defaults
        to ``None``.
    clear : bool
        Whether to clear the axes before plotting (if ``fig`` or ``axes`` are
        provided). Defaults to ``False``.

    Returns
    -------
    figs : list of Figure
        One figure per channel, channel type, or group, depending on values of
        ``picks``, ``group_by``, and ``combine``. See Notes.

    Notes
    -----
    You can control how channels are aggregated into one figure or plotted in
    separate figures through a combination of the ``picks``, ``group_by``, and
    ``combine`` parameters. If ``group_by`` is a :class:`dict`, the result is
    one :class:`~matplotlib.figure.Figure` per dictionary key (for any valid
    values of ``picks`` and ``combine``). If ``group_by`` is ``None``, the
    number and content of the figures generated depends on the values of
    ``picks`` and ``combine``, as summarized in this table:

    .. cssclass:: table-bordered
    .. rst-class:: midvalign

    +----------+----------------------------+------------+-------------------+
    | group_by | picks                      | combine    | result            |
    +==========+============================+============+===================+
    |          | None, int, list of int,    | None,      |                   |
    | dict     | ch_name, list of ch_names, | string, or | 1 figure per      |
    |          | ch_type, list of ch_types  | callable   | dict key          |
    +----------+----------------------------+------------+-------------------+
    |          | None,                      | None,      |                   |
    |          | ch_type,                   | string, or | 1 figure per      |
    |          | list of ch_types           | callable   | ch_type           |
    | None     +----------------------------+------------+-------------------+
    |          | int,                       | None       | 1 figure per pick |
    |          | ch_name,                   +------------+-------------------+
    |          | list of int,               | string or  | 1 figure          |
    |          | list of ch_names           | callable   |                   |
    +----------+----------------------------+------------+-------------------+
    """
    from scipy.ndimage import gaussian_filter1d
    from .. import EpochsArray

    _validate_type(group_by, (dict, None), 'group_by')

    units = _handle_default('units', units)
    scalings = _handle_default('scalings', scalings)
    if set(units) != set(scalings):
        raise ValueError('Scalings and units must have the same keys.')

    # is picks a channel type (or None)?
    picks, picked_types = _picks_to_idx(epochs.info, picks, return_kind=True)
    ch_types = _get_channel_types(epochs.info, picks)

    # `combine` defaults to 'gfp' unless picks are specific channels and
    # there was no group_by passed
    combine_given = combine is not None
    if combine is None and (group_by is not None or picked_types):
        combine = 'gfp'
    # convert `combine` into callable (if None or str)
    combine_func = _make_combine_callable(combine)

    # handle ts_args (params for the evoked time series)
    ts_args = dict() if ts_args is None else ts_args
    manual_ylims = 'ylim' in ts_args
    if combine is not None:
        ts_args['show_sensors'] = False
    vlines = [0] if (epochs.times[0] < 0 < epochs.times[-1]) else []
    ts_defaults = dict(colors={'cond': 'k'}, title='', show=False,
                       truncate_yaxis=False, truncate_xaxis=False,
                       vlines=vlines, legend=False)
    ts_defaults.update(**ts_args)
    ts_args = ts_defaults.copy()

    # construct a group_by dict if one wasn't supplied
    if group_by is None:
        if picked_types:
            # one fig per ch_type
            group_by = {ch_type: picks[np.array(ch_types) == ch_type]
                        for ch_type in set(ch_types)
                        if ch_type in _DATA_CH_TYPES_SPLIT}
        elif combine is None:
            # one fig per pick
            group_by = {epochs.ch_names[pick]: [pick] for pick in picks}
        else:
            # one fig to rule them all
            ch_names = np.array(epochs.ch_names)[picks].tolist()
            key = _set_title_multiple_electrodes(None, combine, ch_names)
            group_by = {key: picks}
    else:
        group_by = deepcopy(group_by)
    # check for heterogeneous sensor type combinations / "combining" 1 channel
    for this_group, these_picks in group_by.items():
        this_ch_type = np.array(ch_types)[np.in1d(picks, these_picks)]
        if len(set(this_ch_type)) > 1:
            types = ', '.join(set(this_ch_type))
            raise ValueError('Cannot combine sensors of different types; "{}" '
                             'contains types {}.'.format(this_group, types))
        # now we know they're all the same type...
        group_by[this_group] = dict(picks=these_picks, ch_type=this_ch_type[0],
                                    title=title)

        # are they trying to combine a single channel?
        if len(these_picks) < 2 and combine_given:
            warn('Only one channel in group "{}"; cannot combine by method '
                 '"{}".'.format(this_group, combine))

    # check for compatible `fig` / `axes`; instantiate figs if needed; add
    # fig(s) and axes into group_by
    group_by = _validate_fig_and_axes(fig, axes, group_by, evoked, colorbar,
                                      clear=clear)

    # prepare images in advance to get consistent vmin/vmax.
    # At the same time, create a subsetted epochs object for each group
    data = epochs.get_data()
    vmin_vmax = {ch_type: dict(images=list(), norm=list())
                 for ch_type in set(ch_types)}
    for this_group, this_group_dict in group_by.items():
        these_picks = this_group_dict['picks']
        this_ch_type = this_group_dict['ch_type']
        this_ch_info = [epochs.info['chs'][n] for n in these_picks]
        these_ch_names = np.array(epochs.info['ch_names'])[these_picks]
        this_data = data[:, these_picks]
        # create subsetted epochs object
        this_info = create_info(sfreq=epochs.info['sfreq'],
                                ch_names=list(these_ch_names),
                                ch_types=[this_ch_type] * len(these_picks))
        this_info['chs'] = this_ch_info
        this_epochs = EpochsArray(this_data, this_info, tmin=epochs.times[0])
        # apply scalings (only to image, not epochs object), combine channels
        this_image = combine_func(this_data * scalings[this_ch_type])
        # handle `order`. NB: this can potentially yield different orderings
        # in each figure!
        this_image, _overlay_times = _order_epochs(this_image, epochs.times,
                                                   order, overlay_times)
        this_norm = np.all(this_image > 0)
        # apply smoothing
        if sigma > 0.:
            this_image = gaussian_filter1d(this_image, sigma=sigma, axis=0,
                                           mode='nearest')
        # update the group_by and vmin_vmax dicts
        group_by[this_group].update(image=this_image, epochs=this_epochs,
                                    norm=this_norm)
        vmin_vmax[this_ch_type]['images'].append(this_image)
        vmin_vmax[this_ch_type]['norm'].append(this_norm)

    # compute overall vmin/vmax for images
    for ch_type, this_vmin_vmax_dict in vmin_vmax.items():
        image_list = this_vmin_vmax_dict['images']
        image_stack = np.stack(image_list)
        norm = all(this_vmin_vmax_dict['norm'])
        vmin_vmax[ch_type] = _setup_vmin_vmax(image_stack, vmin, vmax, norm)
    del image_stack, vmin, vmax

    # prepare to plot
    auto_ylims = {ch_type: [0., 0.] for ch_type in set(ch_types)}

    # plot
    for this_group, this_group_dict in group_by.items():
        this_ch_type = this_group_dict['ch_type']
        this_axes_dict = this_group_dict['axes']
        vmin, vmax = vmin_vmax[this_ch_type]

        # plot title
        if this_group_dict['title'] is None:
            title = _handle_default('titles').get(this_group, this_group)
            if isinstance(combine, str) and len(title):
                _comb = combine.upper() if combine == 'gfp' else combine
                _comb = 'std. dev.' if _comb == 'std' else _comb
                title += f' ({_comb})'

        # plot the image
        this_fig = _plot_epochs_image(
            this_group_dict['image'], epochs=this_group_dict['epochs'],
            picks=picks, colorbar=colorbar, vmin=vmin, vmax=vmax, cmap=cmap,
            style_axes=True, norm=this_group_dict['norm'],
            unit=units[this_ch_type], ax=this_axes_dict, show=False,
            title=title, combine=combine, combine_given=combine_given,
            overlay_times=_overlay_times, evoked=evoked, ts_args=ts_args)
        group_by[this_group].update(fig=this_fig)

        # detect ylims across figures
        if evoked and not manual_ylims:
            # ensure get_ylim works properly
            this_axes_dict['evoked'].figure.canvas.draw_idle()
            this_bot, this_top = this_axes_dict['evoked'].get_ylim()
            this_min = min(this_bot, this_top)
            this_max = max(this_bot, this_top)
            curr_min, curr_max = auto_ylims[ch_type]
            auto_ylims[this_ch_type] = [min(curr_min, this_min),
                                        max(curr_max, this_max)]

    # equalize ylims across figures (does not adjust ticks)
    if evoked:
        for this_group_dict in group_by.values():
            ax = this_group_dict['axes']['evoked']
            ch_type = this_group_dict['ch_type']
            if not manual_ylims:
                args = auto_ylims[ch_type]
                if 'invert_y' in ts_args:
                    args = args[::-1]
                ax.set_ylim(*args)
    plt_show(show)
    # impose deterministic order of returned objects
    return_order = np.array(sorted(group_by))
    are_ch_types = np.in1d(return_order, _VALID_CHANNEL_TYPES)
    if any(are_ch_types):
        return_order = np.concatenate((return_order[are_ch_types],
                                       return_order[~are_ch_types]))
    return [group_by[group]['fig'] for group in return_order]
Beispiel #48
0
def plot_drop_log(drop_log, threshold=0, n_max_plot=20, subject='Unknown subj',
                  color=(0.8, 0.8, 0.8), width=0.8, ignore=('IGNORED',),
                  show=True):
    """Show the channel stats based on a drop_log from Epochs.

    Parameters
    ----------
    drop_log : list of list
        Epoch drop log from Epochs.drop_log.
    threshold : float
        The percentage threshold to use to decide whether or not to
        plot. Default is zero (always plot).
    n_max_plot : int
        Maximum number of channels to show stats for.
    subject : str | None
        The subject name to use in the title of the plot. If ``None``, do not
        display a subject name.

        .. versionchanged:: 0.23
           Added support for ``None``.
    color : tuple | str
        Color to use for the bars.
    width : float
        Width of the bars.
    ignore : list
        The drop reasons to ignore.
    show : bool
        Show figure if True.

    Returns
    -------
    fig : instance of matplotlib.figure.Figure
        The figure.
    """
    import matplotlib.pyplot as plt
    from ..epochs import _drop_log_stats
    percent = _drop_log_stats(drop_log, ignore)
    if percent < threshold:
        logger.info('Percent dropped epochs < supplied threshold; not '
                    'plotting drop log.')
        return
    scores = Counter([ch for d in drop_log for ch in d if ch not in ignore])
    ch_names = np.array(list(scores.keys()))
    counts = np.array(list(scores.values()))
    # init figure, handle easy case (no drops)
    fig, ax = plt.subplots()
    title = f'{percent:.1f}% of all epochs rejected'
    if subject is not None:
        title = f'{subject}: {title}'
    ax.set_title(title)
    if len(ch_names) == 0:
        ax.text(0.5, 0.5, 'No drops', ha='center', fontsize=14)
        return fig
    # count epochs that aren't fully caught by `ignore`
    n_used = sum([any(ch not in ignore for ch in d) or len(d) == 0
                  for d in drop_log])
    # calc plot values
    n_bars = min(n_max_plot, len(ch_names))
    x = np.arange(n_bars)
    y = 100 * counts / n_used
    order = np.flipud(np.argsort(y))
    ax.bar(x, y[order[:n_bars]], color=color, width=width, align='center')
    ax.set_xticks(x)
    ax.set_xticklabels(ch_names[order[:n_bars]], rotation=45, size=10,
                       horizontalalignment='right')
    ax.set_ylabel('% of epochs rejected')
    ax.grid(axis='y')
    tight_layout(pad=1, fig=fig)
    plt_show(show)
    return fig
Beispiel #49
0
 def ignore_cec(self):
     """ Returns whether the CEC data should be ignored. """
     return self.device is not None and \
         any([fnmatch.fnmatchcase(self.device.friendly_name, pattern)
              for pattern in IGNORE_CEC])
Beispiel #50
0
def create_stack(conn, stack_name, tpl_file, config, update=False, dry=False,
                 follow=False, create_on_update=False):
    '''Create or update CloudFormation stack from a jinja2 template'''
    tpl, metadata = gen_template(tpl_file, config)

    # Set default tags which cannot be overwritten
    default_tags = [
        {'Key': 'Env',
         'Value': config['env']},
        {'Key': 'MD5Sum',
         'Value': _calc_md5(tpl)}
    ]

    if metadata:
        # Tags are specified in the format
        # tags:
        #  - key: <key>
        #    value: <value>
        # in metadata, so we have to rebuild that list with the 'key' and
        # 'value' keys capitalised (which is how Cloudformation wants them)
        tags = [{'Key': tag['key'], 'Value': tag['value']} for tag in metadata.get('tags', [])]
        tags.extend(default_tags)
        name_from_metadata = metadata.get('name')
        disable_rollback = metadata.get('disable_rollback')
        if disable_rollback == None:
            disable_rollback = False
    else:
        name_from_metadata = None
        tags = default_tags
        disable_rollback = False

    if stack_name:
        sn = stack_name
    elif name_from_metadata:
        sn = name_from_metadata
    else:
        print('Stack name must be specified via command line argument or stack metadata.')
        sys.exit(1)

    tpl_size = len(tpl)

    if dry:
        print(tpl, flush=True)
        print('Name: {}'.format(sn), file=sys.stderr, flush=True)
        print('Tags: {}'.format(', '.join(['{}={}'.format(tag['Key'], tag['Value']) for tag in tags])), file=sys.stderr, flush=True)
        print('Template size:', tpl_size, file=sys.stderr, flush=True)
        return True

    stack_args = {
        'StackName': sn,
        "Tags": tags,
        "Capabilities": ['CAPABILITY_IAM', 'CAPABILITY_NAMED_IAM'],
        "DisableRollback": disable_rollback
    }

    if tpl_size > 51200:
        stack_args['TemplateURL'] = upload_template(conn, config, tpl, sn)
    else:
        stack_args['TemplateBody'] = tpl

    try:
        if update and create_on_update and not stack_exists(conn, sn):
            conn.create_stack(**stack_args)
        elif update:
            #Can't disable rollback when updating
            del stack_args['DisableRollback']
            conn.update_stack(**stack_args)
        else:
            conn.create_stack(**stack_args)
        if follow:
            get_events(conn, sn, follow, 10)
    except botocore.exceptions.ClientError as err:
        # Do not exit with 1 when one of the below messages are returned
        non_error_messages = [
            'No updates are to be performed',
            'already exists',
        ]
        if any(s in str(err) for s in non_error_messages):
            print(str(err))
            sys.exit(0)
        print(str(err))
        sys.exit(1)
Beispiel #51
0
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task."
    )
    parser.add_argument(
        "--bert_model",
        default=None,
        type=str,
        required=True,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese."
    )
    parser.add_argument("--task_name",
                        default=None,
                        type=str,
                        required=True,
                        help="The name of the task to train.")
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written."
    )

    ## Other parameters
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, and sequences shorter \n"
        "than this will be padded.")
    parser.add_argument("--do_train",
                        default=False,
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        default=False,
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--do_lower_case",
        default=False,
        action='store_true',
        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. "
        "E.g., 0.1 = 10%% of training.")
    parser.add_argument("--no_cuda",
                        default=False,
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument(
        '--fp16',
        default=False,
        action='store_true',
        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument(
        '--loss_scale',
        type=float,
        default=0,
        help=
        "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
        "0 (default value): dynamic loss scaling.\n"
        "Positive power of 2: static loss scaling value.\n")

    args = parser.parse_args()

    processors = {
        "cola": ColaProcessor,
        "mnli": MnliProcessor,
        "mrpc": MrpcProcessor,
    }

    num_labels_task = {
        "cola": 2,
        "mnli": 3,
        "mrpc": 2,
    }

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
    logger.info(
        "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".
        format(device, n_gpu, bool(args.local_rank != -1), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    args.train_batch_size = int(args.train_batch_size /
                                args.gradient_accumulation_steps)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if not args.do_train and not args.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty.".format(
                args.output_dir))
    os.makedirs(args.output_dir, exist_ok=True)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    num_labels = num_labels_task[task_name]
    label_list = processor.get_labels()

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)

    train_examples = None
    num_train_steps = None
    if args.do_train:
        train_examples = processor.get_train_examples(args.data_dir)
        num_train_steps = int(
            len(train_examples) / args.train_batch_size /
            args.gradient_accumulation_steps * args.num_train_epochs)

    # Prepare model
    model = BertForSequenceClassification.from_pretrained(
        args.bert_model,
        cache_dir=PYTORCH_PRETRAINED_BERT_CACHE /
        'distributed_{}'.format(args.local_rank),
        num_labels=num_labels)
    if args.fp16:
        model.half()
    model.to(device)
    if args.local_rank != -1:
        try:
            from apex.parallel import DistributedDataParallel as DDP
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
            )

        model = DDP(model)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    t_total = num_train_steps
    if args.local_rank != -1:
        t_total = t_total // torch.distributed.get_world_size()
    if args.fp16:
        try:
            from apex.optimizers import FP16_Optimizer
            from apex.optimizers import FusedAdam
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
            )

        optimizer = FusedAdam(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              bias_correction=False,
                              max_grad_norm=1.0)
        if args.loss_scale == 0:
            optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
        else:
            optimizer = FP16_Optimizer(optimizer,
                                       static_loss_scale=args.loss_scale)

    else:
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=args.learning_rate,
                             warmup=args.warmup_proportion,
                             t_total=t_total)

    global_step = 0
    if args.do_train:
        train_features = convert_examples_to_features(train_examples,
                                                      label_list,
                                                      args.max_seq_length,
                                                      tokenizer)
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_steps)
        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_label_ids)
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=args.train_batch_size)

        model.train()
        for _ in trange(int(args.num_train_epochs), desc="Epoch"):
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(
                    tqdm(train_dataloader, desc="Iteration")):
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_mask, segment_ids, label_ids = batch
                loss = model(input_ids, segment_ids, input_mask, label_ids)
                if n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu.
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    optimizer.backward(loss)
                else:
                    loss.backward()

                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    # modify learning rate with special warm up BERT uses
                    lr_this_step = args.learning_rate * warmup_linear(
                        global_step / t_total, args.warmup_proportion)
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr_this_step
                    optimizer.step()
                    optimizer.zero_grad()
                    global_step += 1

    # Save a trained model
    model_to_save = model.module if hasattr(
        model, 'module') else model  # Only save the model it-self
    output_model_file = os.path.join(args.output_dir, "pytorch_model.bin")
    torch.save(model_to_save.state_dict(), output_model_file)

    # Load a trained model that you have fine-tuned
    model_state_dict = torch.load(output_model_file)
    model = BertForSequenceClassification.from_pretrained(
        args.bert_model, state_dict=model_state_dict)
    model.to(device)

    if args.do_eval and (args.local_rank == -1
                         or torch.distributed.get_rank() == 0):
        eval_examples = processor.get_dev_examples(args.data_dir)
        eval_features = convert_examples_to_features(eval_examples, label_list,
                                                     args.max_seq_length,
                                                     tokenizer)
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)
        all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in eval_features],
                                     dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_label_ids)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)

        model.eval()
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        for input_ids, input_mask, segment_ids, label_ids in eval_dataloader:
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            label_ids = label_ids.to(device)

            with torch.no_grad():
                tmp_eval_loss = model(input_ids, segment_ids, input_mask,
                                      label_ids)
                logits = model(input_ids, segment_ids, input_mask)

            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.to('cpu').numpy()
            tmp_eval_accuracy = accuracy(logits, label_ids)

            eval_loss += tmp_eval_loss.mean().item()
            eval_accuracy += tmp_eval_accuracy

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        eval_loss = eval_loss / nb_eval_steps
        eval_accuracy = eval_accuracy / nb_eval_examples

        result = {
            'eval_loss': eval_loss,
            'eval_accuracy': eval_accuracy,
            'global_step': global_step,
            'loss': tr_loss / nb_tr_steps
        }

        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results *****")
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
Beispiel #52
0
def export_work(data_manager, objects, data, file_info, export_time, cc_only_inside_sources, worker_progress_signal):
    try:
        progress = 0
        # get object indices list from binary list
        objects_indices = []
        for index, object_ in enumerate(objects):
            if object_:
                objects_indices.append(index)

        # sort objects such that they are grouped by source
        objects_indices_by_source = [
            [object_index for object_index in objects_indices if data_manager.objects[object_index].source is source]
            for source in data_manager.sources]

        # ungroup objects (but sorted now)
        objects_indices = []
        for objects_indices_ in objects_indices_by_source:
            for object_index in objects_indices_:
                objects_indices.append(object_index)

        # raw and processed?
        raw_cell_mean = data[0]
        processed = data[1]
        
        # get feature indices from binary list
        feature_indices = []
        for index, feature in enumerate(data[2:]):
            if feature:
                feature_indices.append(index)

        # get the current pipeline
        pipeline = data_manager.getCurrentPipeline()
        pipeline_array = pipeline.getCalculatingPipeline()

        # get the active states of all features grouped by pipeline
        active_states = [data_manager.objects[object_index].pipeline.getCalculatingActiveStates() for object_index in objects_indices]

        # get the feature indices whose features are not all inactive
        active_feature_indices = [feature_index for feature_index in feature_indices if any([active_state[feature_index] for active_state in active_states])]
        
        # get cc indices
        spike_cc_index = pipeline_array.index(pipeline._spike_cross_correlation)
        amplitude_cc_index = pipeline_array.index(pipeline._amplitude_cross_correlation)

        # get source index for every object
        sources_indices = [data_manager.sources.index(data_manager.objects[object_index].source) for object_index in objects_indices]

        # get the pipeline outputs, method names and parameters grouped by pipeline
        outputs = [
            [step.output for step in data_manager.objects[object_index].pipeline.getCalculatingPipeline()]
            for object_index in objects_indices]
        method_names = [
            [step.getMethod().name for step in data_manager.objects[object_index].pipeline.getCalculatingPipeline()]
            for object_index in objects_indices]
        parameters = [
            [
                {method.name: method.parameters for method in step.methods.values()}
                for step in data_manager.objects[object_index].pipeline.getCalculatingPipeline()]
            for object_index in objects_indices]

        # get the object names
        names = [data_manager.objects[object_index].name for object_index in objects_indices]

        # get the adjusted frequencies, if not available get original
        frequencies = [data_manager.sources[source_index].getFrequency() for source_index in sources_indices]

        # get secondsRange and frameRange for every object
        seconds_ranges = [data_manager.sources[source_index].secondsRange() for source_index in sources_indices]
        frame_ranges = [data_manager.sources[source_index].frameRange() for source_index in sources_indices]

        # get movement correction for every object
        movement_corrections = [
            None if data_manager.movement_corrections[source] is None
            else data_manager.movement_corrections[source].methods['Movement Correction'].getParameters()['correction']
            for source in sources_indices]

        progress += 1
        worker_progress_signal.emit(progress)

        # creating the excel file
        
        workbook = xlsxwriter.Workbook(file_info.absoluteFilePath())
        wrap = workbook.add_format({'text_wrap': True})

        # metadata
        
        metadata = workbook.add_worksheet('metadata')

        row = 0
        metadata.write(row, 0, 'Date of export:')
        metadata.write(row, 1, time.strftime('%x', export_time))
        row += 1
        metadata.write(row, 0, 'Time of export:')
        metadata.write(row, 1, time.strftime('%X', export_time))
        row += 1

        for col, header in enumerate([
            'object index',
            'name',
            'active',
            'source name',
            'source unit',
            'source recording frequency',
            'source start frame',
            'source end frame',
            'source offset',
            'source movement correction',
            'position',
            'angle',
            'size',
            'invert',
            'ellipse mode'
        ]):
            metadata.write(row, col, header)

        for feature_index in active_feature_indices:
            col += 1
            metadata.write(row, col, pipeline_array[feature_index].name + '\nMethod')
            col += 1
            metadata.write(row, col, pipeline_array[feature_index].name + '\nParameters')

        for index, object_index in enumerate(objects_indices):
            row += 1
            object_data = data_manager.objects[object_index]
            for col, content in enumerate([
                index + 1,
                object_data.name,
                str(object_data.active),
                object_data.source.name,
                object_data.source.unit,
                object_data.source.original_frequency,
                object_data.source.start,
                object_data.source.end,
                object_data.source.offset,
                movement_corrections[index],
                str(object_data.pos),
                str(object_data.angle),
                str(object_data.size),
                str(object_data.invert),
                str(object_data.ellipse_mode)
            ]):
                metadata.write(row, col, content)
                
            for feature_index in active_feature_indices:
                if active_states[index][feature_index]:
                    col += 1
                    method_name = method_names[index][feature_index]
                    metadata.write(row, col, method_name)
                    col += 1
                    metadata.write(row, col, str(parameters[index][feature_index][method_name]))
                else:
                    col += 2

        progress += 1
        worker_progress_signal.emit(progress)

        # raw

        if raw_cell_mean:
            cell_mean = workbook.add_worksheet('Raw')
            cell_mean_data = [data_manager.objects[oject_index].cell_mean for oject_index in objects_indices]
            for index, cell_mean_data_ in enumerate(cell_mean_data):
                if cell_mean_data_ is None:
                    cell_mean_data[index] = data_manager.sources[sources_indices[index]].getData()
            writeFeature(cell_mean, wrap, 0, names, cell_mean_data, 'Raw', x_axis=True, x_axis_label='Time (frame)', x_axis_values=frame_ranges)
            progress += 1
            worker_progress_signal.emit(progress)

        # processed

        if processed:
            processed = workbook.add_worksheet('Processed')
            processed_data = [data_manager.objects[object_index].processed for object_index in objects_indices]
            writeFeature(processed, wrap, 0, names, processed_data, 'Processed', x_axis=True, x_axis_label='Time (s)', x_axis_values=seconds_ranges)
            progress += 1
            worker_progress_signal.emit(progress)

        # features, except for cc
        index = 0
        for feature_index in feature_indices:

            # ignore cc for now
            if feature_index in [spike_cc_index, amplitude_cc_index]:
                continue

            # only do stuff (add worksheet and fill) if any of the features is active, and never create for AdjustFrequency
            if feature_index in active_feature_indices and pipeline_array[feature_index].name != 'Adjust Frequency':
                col = 0
                worksheet = workbook.add_worksheet(pipeline_array[feature_index].name)

                for (output_key, label, use_seconds_as_x) in [
                    ('background mean', 'Background Mean', False),
                    ('baseline', 'Baseline', False),
                    ('train', 'Train', True)
                ]:
                    if output_key in pipeline_array[feature_index].output.keys():
                        objects_indices_ = [object_index for object_index in range(len(objects_indices)) if active_states[object_index][feature_index] and outputs[object_index][feature_index][output_key] is not None]
                        objects_names = [names[object_index] for object_index in objects_indices_]
                        data_ = [outputs[object_index][feature_index][output_key] for object_index in objects_indices_]
                        x_axis_values = [seconds_ranges[object_index] if use_seconds_as_x else frame_ranges[object_index] for object_index in objects_indices_]
                        x_axis_label = 'Time (s)' if use_seconds_as_x else 'Time (frames)'
                        col = writeFeature(worksheet, wrap, col, objects_names, data_, label, x_axis=True, x_axis_label=x_axis_label, x_axis_values=x_axis_values)

                for (output_key, label) in [
                    ('mean shape', 'Mean Shape'),
                    ('mean shape smoothed', 'Mean Shape Smoothed')
                ]:
                    if output_key in pipeline_array[feature_index].output.keys():
                        objects_indices_ = [object_index for object_index in range(len(objects_indices)) if active_states[object_index][feature_index] and outputs[object_index][feature_index][output_key] is not None]
                        objects_names = [names[object_index] for object_index in objects_indices_]
                        data_ = [outputs[object_index][feature_index][output_key] for object_index in objects_indices_]
                        x_axis_values = []
                        for index, object_index in enumerate(objects_indices_):
                            left,right = parameters[object_index][feature_index][method_names[object_index][feature_index]]['interval']
                            data_len = len(data_[index])
                            x_axis_values.append(np.linspace(-left / 1000.0, right / 1000.0, num=data_len))
                        col = writeFeature(worksheet,wrap, col, objects_names, data_, label, x_axis=True, x_axis_label='Time (s)', x_axis_values=x_axis_values)

                if 'psd' in pipeline_array[feature_index].output.keys():
                    objects_indices_ = [object_index for object_index in range(len(objects_indices)) if active_states[object_index][feature_index] and outputs[object_index][feature_index]['psd'] is not None]
                    objects_names = [names[object_index] for object_index in objects_indices_]
                    data_ = [outputs[object_index][feature_index]['psd'] for object_index in objects_indices_]
                    x_axis_values = [outputs[object_index][feature_index]['frequencies'] for object_index in objects_indices_]
                    col = writeFeature(worksheet, wrap, col, objects_names, data_, 'PSD', x_axis=True, x_axis_label='Frequency (Hz)', x_axis_values=x_axis_values)

                if col != 0:
                    col += 2

                for (output_key, label)  in [
                    ('noise_std', 'Standard Deviation of Noise'),
                    ('time', 'Time of Peak (s)'),
                    ('amplitude', 'Amplitude of Peak'),
                    ('duration', 'Duration (s)'),
                    ('max power frequency', 'Frequency of Max PSD Value (Hz)'),
                    ('max power', 'Max PSD Value'),
                    ('tPeak', 'Time until peak ("tPeak") (s)'),
                    ('aMax', 'Peak amplitude minus base ("aMax")'),
                    ('τDecay', 'Decay time constant ("τDecay")'),
                    ('spike frequency', 'Spike Frequency (#Spikes / second)'),
                    ('burst frequency', 'Burst Frequency (#Bursts / second)')
                ]:
                    if output_key in pipeline_array[feature_index].output.keys():
                        objects_indices_ = [object_index for object_index in range(len(objects_indices)) if active_states[object_index][feature_index] and outputs[object_index][feature_index][output_key] is not None]
                        objects_names = [names[object_index] for object_index in objects_indices_]
                        data_ = [outputs[object_index][feature_index][output_key] for object_index in objects_indices_]
                        if output_key in ['time', 'duration', 'tPeak']:
                            freqs = [frequencies[object_index] for object_index in objects_indices_]
                            data_ = [d / freqs[idx] for idx,d in enumerate(data_)]
                        col = writeFeature(worksheet, wrap, col, objects_names, data_, label, x_axis=False)
                        col += 2

                index += 1
            
            # update progress for every feature that was selected
            progress += 1
            worker_progress_signal.emit(progress)

        # cc

        for feature_index in [spike_cc_index, amplitude_cc_index]:
            # check if feature was selected
            if feature_index in feature_indices:
                
                feature = pipeline_array[feature_index]

                # only do stuff (create worksheet and fill) if there is output
                if feature.active and len([out for out in feature.output.values() if out is None]) == 0:
                    cross_correlation = workbook.add_worksheet(pipeline_array[feature_index].name)
                    row = 0
                    col = 0

                    cc_objects_names = [data_['name'] for data_ in feature.input[feature.input_data_name] if not 'train' in data_.keys() or data_['train'] is not None]
                    if cc_only_inside_sources:
                        cc_objects = []
                        for name in cc_objects_names:
                            for object_ in data_manager.objects:
                                if object_.name == name:
                                    cc_objects.append(object_)
                                    break
                    n = len(cc_objects_names)
                    combinations = []
                    for i in range(n-1):
                        for j in range(i+1, n):
                            if not cc_only_inside_sources or cc_objects[i].source is cc_objects[j].source:
                                combinations.append((i,j))
                    
                    cc_names = ['{}\n{}'.format(cc_objects_names[i], cc_objects_names[j]) for (i,j) in combinations]
                    data_ = [feature.output['correlation'][i,j] for (i,j) in combinations]
                    x_axis_values = [feature.output['xrange'] for _ in combinations]
                    col = writeFeature(cross_correlation, wrap, col, cc_names, data_, 'Correlation', x_axis=True, x_axis_label='Lag (s)', x_axis_values=x_axis_values)
                    col += 2
                    data_ = [feature.output['coefficient'][i,j] for (i,j) in combinations]
                    col = writeFeature(cross_correlation, wrap, col, cc_names, data_, 'Correlation Coefficient', x_axis=False)
                    col += 2
                    title = 'Main Lag' if feature_index == amplitude_cc_index else 'Center of Bin that contains Main Lag'
                    data_ = [feature.output['delay'][i,j] for (i,j) in combinations]
                    col = writeFeature(cross_correlation, wrap, col, cc_names, data_, title, x_axis=False)
                    if feature_index == amplitude_cc_index:
                        col += 2
                        data_ = [feature.output['delay coefficient'][i,j] for (i,j) in combinations]
                        col = writeFeature(cross_correlation, wrap, col, cc_names, data_, 'Correlation Coefficient at Main Lag', x_axis=False)

                # update progress for every feature that was selected
                progress += 1
                worker_progress_signal.emit(progress)

        # finish xlsx file
        try:
            workbook.close()
            result = True
        except IOError:
            result = False
        progress += 1
        worker_progress_signal.emit(progress)
    except (SystemExit, KeyboardInterrupt):
        raise
    except:
        result = False
        traceback.print_exc()

    return result
Beispiel #53
0
      if retry:
        if not os.getenv("CI"):
          print("scons build failed, cleaning in")
          for i in range(3, -1, -1):
            print("....%d" % i)
            time.sleep(1)
          subprocess.check_call(["scons", "-c"], cwd=BASEDIR, env=env)
          shutil.rmtree("/tmp/scons_cache")
        else:
          print("scons build failed after retry")
          sys.exit(1)
      else:
        # Build failed log errors
        errors = [line.decode('utf8', 'replace') for line in compile_output
                  if any([err in line for err in [b'error: ', b'not found, needed by target']])]
        error_s = "\n".join(errors)
        add_logentries_handler(cloudlog)
        cloudlog.error("scons build failed\n" + error_s)

        # Show TextWindow
        no_ui = __name__ != "__main__" or not ANDROID
        error_s = "\n \n".join(["\n".join(textwrap.wrap(e, 65)) for e in errors])
        with TextWindow("openpilot failed to build\n \n" + error_s, noop=no_ui) as t:
          t.wait_for_exit()

        exit(1)
    else:
      break

import cereal
def ImportAedatDataVersion1or2(aedat):
    """
    Later ;)
    """

    # unpack    
    info = aedat['info']
    importParams = aedat['importParams']
    fileHandle = importParams['fileHandle']

    # The formatVersion dictates whether there are 6 or 8 bytes per event.
    if info['fileFormat'] == 1:
        numBytesPerEvent = 6
        addrPrecision = np.dtype([('addr', '>u2'), ('ts', '>u4')])
    else:
        numBytesPerEvent = 8
        addrPrecision = np.dtype([('addr', '>u4'), ('ts', '>u4')])

    # Find the number of events, assuming that the file position is just at the
    # end of the headers.
    fileHandle.seek(0, 2)
    info['numEventsInFile'] = int(np.floor(
        (fileHandle.tell() - info['beginningOfDataPointer']) /
        numBytesPerEvent))

    # Check the startEvent and endEvent parameters
    if 'startEvent' in importParams:
        startEvent = importParams['startEvent']
    else:
        startEvent = 0
    assert startEvent <= info['numEventsInFile']
    if 'endEvent' in importParams:
        endEvent = importParams['endEvent']
    else:
        endEvent = info['numEventsInFile']
    assert endEvent <= info['numEventsInFile']    
    if 'startPacket' in importParams:
        print("The startPacket parameter is set, but range by packets is not "
              "available for .aedat version < 3 files")
    if 'endPacket' in importParams:
        print("The endPacket parameter is set, but range by events is not "
              "available for .aedat version < 3 files")
    assert startEvent <= endEvent

    numEventsToRead = endEvent - startEvent + 1

    # Read events
    print('Reading events ...')
    fileHandle.seek(info['beginningOfDataPointer'] + numBytesPerEvent *
                     startEvent)
    allEvents = np.fromfile(fileHandle, addrPrecision, numEventsToRead)

    allAddr = np.array(allEvents['addr'])
    allTs = np.array(allEvents['ts'])

    # Trim events outside time window.
    # This is an inefficent implementation, which allows for non-monotonic
    # timestamps.

    if 'startTime' in importParams:
        print('Cropping events by time ...')
        tempIndex = np.nonzero(allTs >= importParams['startTime'] * 1e6)
        allAddr = allAddr[tempIndex]
        allTs = allTs[tempIndex]

    if 'endTime' in importParams:
        print('Cropping events by time ...')
        tempIndex = np.nonzero(allTs <= importParams['endTime'] * 1e6)
        allAddr = allAddr[tempIndex]
        allTs = allTs[tempIndex]

    # Interpret the addresses
    
    """
    Split between DVS/DAVIS and DAS.
        For DAS1:
            - Special events - external injected events has never been
            implemented for DAS
            - Split between Address events and ADC samples
            - Intepret address events
            - Interpret ADC samples
        For DVS128:
            - Special events - external injected events are on bit 15 = 1 
            there is a more general label for special events which is bit 31 =
            1, but this has ambiguous interpretations  it is also overloaded
            for the stereo pair encoding - ignore this. 
            - Intepret address events
        For DAVIS:
            - Special events
                - Interpret IMU events from special events
            - Interpret DVS events according to chip class
            - Interpret APS events according to chip class
    """
    
    """
        # DAVIS. In the 32-bit address:
        # bit 32 (1-based) being 1 indicates an APS sample
        # bit 11 (1-based) being 1 indicates a special event
        # bits 11 and 32 (1-based) both being zero signals a polarity event
    """

    # Create a structure to put all the data in 
    outputData = {}

    if info['source'] == 'Das1':

        # To do: DAS
        pass
    
    elif info['source'] == 'Dvs128':
    
        # To do: Dvs128
        pass
    
    else: # DAVIS
    
        """ 
        In the 32-bit address:
        bit 32 (1-based) being 1 indicates an APS sample
        bit 11 (1-based) being 1 indicates a special event 
        bits 11 and 32 (1-based) both being zero signals a polarity event
        """

        print('Building logical indices by type ...') 
        apsOrImuMask = int('80000000', 16)
        apsOrImuLogical = np.bitwise_and(allAddr, apsOrImuMask)
        apsOrImuLogical = apsOrImuLogical.astype(bool)
        signalOrSpecialMask = int('400', 16)
        signalOrSpecialLogical = np.bitwise_and(allAddr, signalOrSpecialMask)
        signalOrSpecialLogical = signalOrSpecialLogical.astype(bool)

        # These masks are used for both frames and polarity events, so are defined
        # outside of the following if statement
        yMask = int('7FC00000', 16)
        yShiftBits = 22
        xMask = int('003FF000', 16)
        xShiftBits = 12        
        polarityMask = int('00000800', 16)
        
        specialLogical = np.logical_and(signalOrSpecialLogical,
                                       np.logical_not(apsOrImuLogical))
    # Special events
        if ('dataTypes' not in importParams or 'special' in importParams['dataTypes']) \
                 and any(specialLogical):
            print('Processing special events ...')
            outputData['special'] = {}
            outputData['special']['timeStamp'] = allTs[specialLogical] 
            # No need to create address field, since there is only one type of special event
        del specialLogical
    
        polarityLogical = np.logical_and(np.logical_not(apsOrImuLogical),
                                      np.logical_not(signalOrSpecialLogical))
        # Polarity(DVS) events
        if ('dataTypes' not in importParams or 'polarity' in importParams['dataTypes']) \
                and any(polarityLogical):
            print('Processing polarity events ...')
            polarityData = allAddr[polarityLogical]         
            outputData['polarity'] = {}
            outputData['polarity']['timeStamp'] = allTs[polarityLogical]
            # Y addresses
            outputData['polarity']['y'] = np.array(np.right_shift( \
                np.bitwise_and(polarityData, yMask), yShiftBits), 'uint16')
            # X addresses
            outputData['polarity']['x'] = np.array(np.right_shift( \
                np.bitwise_and(polarityData, xMask), xShiftBits), 'uint16')
            # Polarity bit
            
            # Note: no need for a bitshift here, since its converted to boolean anyway
            outputData['polarity']['polarity'] = np.array( \
            np.bitwise_and(polarityData, polarityMask), 'bool')
            del polarityData
        del polarityLogical


        ImuOrPolarityMask = int('800', 16)
        ImuOrPolarityLogical = np.bitwise_and(allAddr, ImuOrPolarityMask)
        ImuOrPolarityLogical = ImuOrPolarityLogical.astype(bool)
        frameLogical = np.logical_and(apsOrImuLogical,
                                     np.logical_not(ImuOrPolarityLogical))
       # Frame events
        if ('dataTypes' not in importParams or 'frame' in importParams['dataTypes']) \
                and any(frameLogical):
            print('Processing frames ...')
            frameSampleMask = int('1111111111', 2) 
            
            frameData = allAddr[frameLogical] 
            frameTs = allTs[frameLogical] 
    
            # Note: uses int16 instead of uint16 to allow for a subtraction operation below to look for discontinuities
            frameX = np.array(np.right_shift(np.bitwise_and(frameData, xMask), xShiftBits), 'int16') 
            frameY = np.array(np.right_shift(np.bitwise_and(frameData, yMask), yShiftBits), 'int16') 
            frameSample = np.array(np.bitwise_and(frameData, frameSampleMask), 'uint16') 
            # Note: no need for a bitshift here, since it's converted to boolean anyway
            frameSignal = np.array(np.bitwise_and(frameData, signalOrSpecialMask), 'bool') 
            
             # In general the ramp of address values could be in either
             # direction and either x or y could be the outer(inner) loop
             # Search for a discontinuity in both x and y simultaneously
            frameXDiscont = abs(frameX[1 : ] - frameX[0 : -1]) > 1 
            frameYDiscont = abs(frameY[1 : ] - frameY[0 : -1]) > 1
            frameDiscontIndex = np.where(np.logical_and(frameXDiscont, frameYDiscont))
            frameDiscontIndex = frameDiscontIndex[0] # The last line produces a tuple - we only want the array
            frameStarts = np.concatenate([[0], frameDiscontIndex  + 1, [frameData.size]])
             # Now we have the indices of the first sample in each frame, plus
             # an additional index just beyond the end of the array
            numFrames = frameStarts.size - 1 
            outputData['frame'] = {}
            outputData['frame']['reset']            = np.zeros(numFrames, 'bool') 
            outputData['frame']['timeStampStart']   = np.zeros(numFrames, 'uint32') 
            outputData['frame']['timeStampEnd']     = np.zeros(numFrames, 'uint32')
            outputData['frame']['samples']          = np.empty(numFrames, 'object') 
            outputData['frame']['xLength']          = np.zeros(numFrames, 'uint16') 
            outputData['frame']['yLength']          = np.zeros(numFrames, 'uint16') 
            outputData['frame']['xPosition']        = np.zeros(numFrames, 'uint16') 
            outputData['frame']['yPosition']        = np.zeros(numFrames, 'uint16') 
            
            for frameIndex in range(0, numFrames) :
                if frameIndex % 10 == 9:
                    print('Processing frame ', frameIndex + 1, ' of ', numFrames)
                # All within a frame should be either reset or signal. I could
                # implement a check here to see that that's true, but I haven't
                # done so; rather I just take the first value
                outputData['frame']['reset'][frameIndex] \
                    = not frameSignal[frameStarts[frameIndex]]  
                
                 # in aedat 2 format we don't have the four timestamps of aedat 3 format
                 # We expect to find all the same timestamps  
                 # nevertheless search for lowest and highest
                outputData['frame']['timeStampStart'][frameIndex] \
                    = min(frameTs[frameStarts[frameIndex] : frameStarts[frameIndex + 1]])  
                outputData['frame']['timeStampEnd'][frameIndex] \
                    = max(frameTs[frameStarts[frameIndex] : frameStarts[frameIndex + 1]])  
    
                tempXPosition = min(frameX[frameStarts[frameIndex] : frameStarts[frameIndex + 1]]) 
                outputData['frame']['xPosition'][frameIndex] = tempXPosition 
                tempYPosition = min(frameY[frameStarts[frameIndex] : frameStarts[frameIndex + 1]]) 
                outputData['frame']['yPosition'][frameIndex] = tempYPosition 
                outputData['frame']['xLength'][frameIndex] \
                    = max(frameX[frameStarts[frameIndex] : frameStarts[frameIndex + 1]]) \
                        - outputData['frame']['xPosition'][frameIndex] + 1 
                outputData['frame']['yLength'][frameIndex] \
                    = max(frameY[frameStarts[frameIndex] : frameStarts[frameIndex + 1]]) \
                        - outputData['frame']['yPosition'][frameIndex] + 1 
                # If we worked out which way the data is ramping in each
                # direction, and if we could exclude data loss, then we could
                # do some nice clean matrix transformations; but I'm just going
                # to iterate through the samples, putting them in the right
                # place in the array according to their address
                
                 # first create a temporary array - there is no concept of
                 # colour channels in aedat2
                
                # IN MATLAB IMPLEMENTATION, THIS FOLLOWING LOOP IS REPLACED BY ACCUMARRAY FUNCTION - Haven't figured out a good python equivalent yet                
                tempSamples = np.zeros((outputData['frame']['yLength'][frameIndex], \
                                    outputData['frame']['xLength'][frameIndex]), dtype='uint16') 
                for sampleIndex in range(frameStarts[frameIndex], frameStarts[frameIndex + 1]):
                    tempSamples[frameY[sampleIndex] \
                                    - outputData['frame']['yPosition'][frameIndex], \
                                frameX[sampleIndex] \
                                    - outputData['frame']['xPosition'][frameIndex]] \
                        = frameSample[sampleIndex] 

                outputData['frame']['samples'][frameIndex] = tempSamples 
    
            if (not ('subtractResetRead' in importParams) or importParams['subtractResetRead']) \
                    and 'reset' in outputData['frame']:
                # Make a second pass through the frames, subtracting reset
                # reads from signal reads
                frameCount = 0
                for frameIndex in range(0, numFrames):
                    if frameIndex % 10 == 9:
                        print('Performing subtraction on frame ', frameIndex + 1, ' of ', numFrames)
                    if outputData['frame']['reset'][frameIndex]: 
                        resetFrame = outputData['frame']['samples'][frameIndex] 
                        resetXPosition = outputData['frame']['xPosition'][frameIndex] 
                        resetYPosition = outputData['frame']['yPosition'][frameIndex] 
                        resetXLength = outputData['frame']['xLength'][frameIndex] 
                        resetYLength = outputData['frame']['yLength'][frameIndex]                     
                    else: 
                         # If a resetFrame has not yet been found, 
                         # push through the signal frame as is
                        if not 'resetFrame' in locals():
                            outputData['frame']['samples'][frameCount] \
                                = outputData['frame']['samples'][frameIndex] 
                        else:
                             # If the resetFrame and signalFrame are not the same size,    
                             # don't attempt subtraction 
                             # (there is probably a cleaner solution than this - could be improved)
                            if resetXPosition != outputData['frame']['xPosition'][frameIndex] \
                                or resetYPosition != outputData['frame']['yPosition'][frameIndex] \
                                or resetXLength != outputData['frame']['xLength'][frameIndex] \
                                or resetYLength != outputData['frame']['yLength'][frameIndex]:
                                outputData['frame']['samples'][frameCount] \
                                    = outputData['frame']['samples'][frameIndex] 
                            else:
                                 # Do the subtraction
                                outputData['frame']['samples'][frameCount] \
                                    = resetFrame - outputData['frame']['samples'][frameIndex] 
                                # This operation was on unsigned integers, set negatives to zero
                                outputData['frame']['samples'][frameCount][outputData['frame']['samples'][frameCount] > 32767] = 0
                             # Copy over the reset of the info
                            outputData['frame']['xPosition'][frameCount] \
                                = outputData['frame']['xPosition'][frameIndex] 
                            outputData['frame']['yPosition'][frameCount] \
                                = outputData['frame']['yPosition'][frameIndex] 
                            outputData['frame']['xLength'][frameCount] \
                                = outputData['frame']['xLength'][frameIndex] 
                            outputData['frame']['yLength'][frameCount] \
                                = outputData['frame']['yLength'][frameIndex] 
                            outputData['frame']['timeStampStart'][frameCount] \
                                = outputData['frame']['timeStampStart'][frameIndex]  
                            outputData['frame']['timeStampEnd'][frameCount] \
                                = outputData['frame']['timeStampEnd'][frameIndex]                              
                            frameCount = frameCount + 1
                 # Clip the arrays
                outputData['frame']['xPosition'] \
                    = outputData['frame']['xPosition'][0 : frameCount] 
                outputData['frame']['yPosition'] \
                    = outputData['frame']['yPosition'][0 : frameCount] 
                outputData['frame']['xLength'] \
                    = outputData['frame']['xLength'][0 : frameCount] 
                outputData['frame']['yLength'] \
                    = outputData['frame']['yLength'][0 : frameCount] 
                outputData['frame']['timeStampStart'] \
                    = outputData['frame']['timeStampStart'][0 : frameCount] 
                outputData['frame']['timeStampEnd'] \
                    = outputData['frame']['timeStampEnd'][0 : frameCount] 
                outputData['frame']['samples'] \
                    = outputData['frame']['samples'][0 : frameCount]
                del outputData['frame']['reset']   # reset is no longer needed
        del frameLogical
    
    
        # IMU events
        # These come in blocks of 7, for the 7 different values produced in
        # a single sample; the following code recomposes these
        # 7 words are sent in series, these being 3 axes for accel, temperature, and 3 axes for gyro

        imuLogical = np.logical_and(apsOrImuLogical, ImuOrPolarityLogical)
        if ('dataTypes' not in importParams or 'imu6' in importParams['dataTypes']) \
                and any(imuLogical):
            print('Processing IMU6 events ...')
            outputData['imu6'] = {}
            outputData['imu6']['timeStamp'] = allTs[imuLogical]

            if np.mod(np.count_nonzero(imuLogical), 7) > 0: 
                print('The number of IMU samples is not divisible by 7, so IMU samples are not interpretable')
            else:
                outputData['imu6']['timeStamp'] = allTs[imuLogical]
                outputData['imu6']['timeStamp'] \
                    = outputData['imu6']['timeStamp'][0 : : 7]
    
            # Conversion factors
            # Actually these scales depend on the fiull scale value 
            # with which the IMU is configured.
            # Here I assume jaer defaults: 1000 deg/s for gyro and 8 g for accel
            # Given 16 bit samples, this results in the following:
            accelScale = 1.0/8192 # This gives acceleration in g
            gyroScale = 1.0/65.535 # This gives angular velocity in deg/s
            temperatureScale = 1.0/340
            temperatureOffset=35.0
    
            imuDataMask = int('0FFFF000', 16)
            imuDataShiftBits = 12
            rawData = np.right_shift(np.bitwise_and(allAddr[imuLogical], imuDataMask), imuDataShiftBits)
            # This is a uint32 which contains an int16. Need to convert to int16 before converting to float.             
            rawData = rawData.astype('int16')
            rawData = rawData.astype('float32')
                        
            outputData['imu6']['accelX']        = rawData[0 : : 7] * accelScale    
            outputData['imu6']['accelY']        = rawData[1 : : 7] * accelScale    
            outputData['imu6']['accelZ']        = rawData[2 : : 7] * accelScale    
            outputData['imu6']['temperature']   = rawData[3 : : 7] * temperatureScale + temperatureOffset   
            outputData['imu6']['gyroX']         = rawData[4 : : 7] * gyroScale  
            outputData['imu6']['gyroY']         = rawData[5 : : 7] * gyroScale
            outputData['imu6']['gyroZ']         = rawData[6 : : 7] * gyroScale
        del imuLogical

    # If you want to do chip-specific address shifts or subtractions,
    # this would be the place to do it.

    # calculate numEvents fields  also find first and last timeStamps
    info['firstTimeStamp'] = np.infty
    info['lastTimeStamp'] = 0

    aedat['info'] = info
    aedat['data'] = outputData

    # Find first and last time stamps        
    aedat = FindFirstAndLastTimeStamps(aedat)
    
    # Add NumEvents field for each data type
    aedat = NumEventsByType(aedat)
       
    return aedat
Beispiel #55
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--data_dir",
        default="../data/bert",
        type=str,
        required=False,
        help=
        "The input data dir. Should contain the _p.tsv files (or other data files) for the task."
    )
    parser.add_argument(
        "--bert_model",
        default='bert-base-uncased',
        type=str,
        required=False,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese."
    )
    parser.add_argument("--task_name",
                        default='MRPC',
                        type=str,
                        required=False,
                        help="The name of the task to train.")
    parser.add_argument(
        "--output_dir",
        default="../data/bert/output",
        type=str,
        required=False,
        help="The output directory where the model checkpoints will be written."
    )

    # Other parameters
    parser.add_argument(
        "--max_seq_length",
        default=192,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, and sequences shorter \n"
        "than this will be padded.")
    parser.add_argument("--do_train",
                        default=False,
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        default=False,
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--do_predict",
                        default=True,
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--do_lower_case",
        default=True,
        action='store_true',
        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--train_batch_size",
                        default=128,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=128,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. "
        "E.g., 0.1 = 10%% of training.")
    parser.add_argument("--no_cuda",
                        default=False,
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument(
        '--optimize_on_cpu',
        default=False,
        action='store_true',
        help=
        "Whether to perform optimization and keep the optimizer averages on CPU"
    )
    parser.add_argument(
        '--fp16',
        default=False,
        action='store_true',
        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument(
        '--loss_scale',
        type=float,
        default=128,
        help=
        'Loss scaling, positive power of 2 values can improve fp16 convergence.'
    )
    parser.add_argument('--load_model_path',
                        default='../data/models/base-uncased-192-2of9-ep1.pt',
                        help='Load model for prediction')

    args = parser.parse_args()

    processors = {"mrpc": MrpcProcessor}

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
        if args.fp16:
            logger.info(
                "16-bits training currently not supported in distributed training"
            )
            args.fp16 = False  # (see https://github.com/pytorch/pytorch/pull/13496)
    logger.info("device %s n_gpu %d distributed training %r", device, n_gpu,
                bool(args.local_rank != -1))

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    args.train_batch_size = int(args.train_batch_size /
                                args.gradient_accumulation_steps)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if not args.do_train and not args.do_eval and not args.do_predict:
        raise ValueError(
            "At least one of `do_train` or `do_eval` or `do_predict` must be True."
        )

    # if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
    #     raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir))
    os.makedirs(args.output_dir, exist_ok=True)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    label_list = processor.get_labels()

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)

    train_examples = None
    num_train_steps = None
    if args.do_train:
        train_examples = processor.get_train_examples(args.data_dir)
        num_train_steps = int(
            len(train_examples) / args.train_batch_size /
            args.gradient_accumulation_steps * args.num_train_epochs)

    # Prepare model
    model = BertForNextSentencePrediction.from_pretrained(args.bert_model)
    # model = BertForNextSentencePrediction.from_pretrained(args.bert_model,
    #                                                       cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank))

    if args.fp16:
        model.half()
    model.to(device)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.local_rank], output_device=args.local_rank)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)

    print('model loading')
    model.load_state_dict(torch.load(args.load_model_path))
    print('model loaded')

    # Prepare optimizer
    if args.fp16:
        param_optimizer = [
            (n, param.clone().detach().to('cpu').float().requires_grad_())
            for n, param in model.named_parameters()
        ]
    elif args.optimize_on_cpu:
        param_optimizer = [(n,
                            param.clone().detach().to('cpu').requires_grad_())
                           for n, param in model.named_parameters()]
    else:
        param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay_rate':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay_rate':
        0.0
    }]
    t_total = num_train_steps
    if args.local_rank != -1:
        t_total = t_total // torch.distributed.get_world_size()
    optimizer = BertAdam(optimizer_grouped_parameters,
                         lr=args.learning_rate,
                         warmup=args.warmup_proportion,
                         t_total=t_total)

    global_step = 0

    if args.do_predict and (args.local_rank == -1
                            or torch.distributed.get_rank() == 0):

        eval_examples = processor.get_test_examples(args.data_dir)
        eval_features = convert_examples_to_features(eval_examples, label_list,
                                                     args.max_seq_length,
                                                     tokenizer)
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)
        all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in eval_features],
                                     dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_label_ids)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)

        model.eval()
        count = 0
        predictions = [0]
        for input_ids, input_mask, segment_ids, label_ids in eval_dataloader:
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            label_ids = label_ids.to(device)

            with torch.no_grad():
                tmp_eval_loss = model(input_ids, segment_ids, input_mask,
                                      label_ids)
                logits = model(input_ids, segment_ids, input_mask)

            logits = logits.detach().cpu().numpy()  #predicted label
            logits = logits[:, 1]
            predictions.extend(logits)
            count += args.eval_batch_size
            print(count / len(eval_examples) * 100)

        data = pd.read_csv('../data/bert/eval1_unlabelled_p.tsv',
                           sep='\t',
                           header=None)
        data[len(data.columns)] = predictions
        data.to_csv('../data/pre-answer.tsv',
                    sep='\t',
                    header=False,
                    index=False)

        # Converting to submission format
        data = pd.read_csv('../data/pre-answer.tsv',
                           sep='\t',
                           names=[
                               'query_id', 'query_text', 'passage_text',
                               'passage_id', 'cs'
                           ])
        uniq, index = np.unique(data['query_id'], return_index=True)
        query_id = uniq[index.argsort()]
        scores = data['cs'].values.reshape(-1, 10)
        print(scores.shape)
        answer = np.column_stack((query_id, scores))
        answer = pd.DataFrame(answer)
        answer.iloc[:, 0] = answer.iloc[:, 0].astype('int')
        answer.to_csv('../data/answer.tsv', sep='\t', header=None, index=False)
Beispiel #56
0
#==============================================================================
# Set up common services and job object.
# This should appear in ALL derivation job options
#==============================================================================
# Add translator from EVGEN input to xAOD-like truth here
from DerivationFrameworkCore.DerivationFrameworkMaster import *
from RecExConfig.ObjKeyStore import objKeyStore
from xAODTruthCnv.xAODTruthCnvConf import xAODMaker__xAODTruthCnvAlg
from RecExConfig.InputFilePeeker import inputFileSummary

#ensure EventInfoCnvAlg is scheduled in the main algsequence, if not already, and is needed
from RecExConfig.InputFilePeeker import inputFileSummary
if ("EventInfo#McEventInfo" not in inputFileSummary['eventdata_itemsList']) and not any(isinstance(x,CfgMgr.xAODMaker__EventInfoCnvAlg) for x in DerivationFrameworkJob):
    DerivationFrameworkJob += CfgMgr.xAODMaker__EventInfoCnvAlg()

# Decide what kind of input HepMC container we are dealing with
if ("McEventCollection#GEN_EVENT" in inputFileSummary['eventdata_itemsList']):
    DerivationFrameworkJob += xAODMaker__xAODTruthCnvAlg("GEN_EVNT2xAOD",AODContainerName="GEN_EVENT")
elif ("McEventCollection#TruthEvent" in inputFileSummary['eventdata_itemsList']):
    DerivationFrameworkJob += xAODMaker__xAODTruthCnvAlg("GEN_EVNT2xAOD",AODContainerName="TruthEvent")

#==============================================================================
# Create the derivation kernel algorithm
#==============================================================================
from DerivationFrameworkCore.DerivationFrameworkCoreConf import DerivationFramework__DerivationKernel
DerivationFrameworkJob += CfgMgr.DerivationFramework__DerivationKernel("TRUTH0Kernel")

#==============================================================================
# Set up stream
#==============================================================================
streamName = derivationFlags.WriteDAOD_TRUTH0Stream.StreamName
Beispiel #57
0
def train(args, train_dataset, model, tokenizer, labels, pad_token_label_id):
    """ Train the model """
    if args.local_rank in [-1, 0]:
        tb_writer = SummaryWriter()

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(
        train_dataset) if args.local_rank == -1 else DistributedSampler(
            train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(
            train_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=t_total)

    # Check if saved optimizer or scheduler states exist
    if os.path.isfile(os.path.join(
            args.model_name_or_path, "optimizer.pt")) and os.path.isfile(
                os.path.join(args.model_name_or_path, "scheduler.pt")):
        # Load in optimizer and scheduler states
        optimizer.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
        scheduler.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))

    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)

    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1),
    )
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 0
    epochs_trained = 0
    steps_trained_in_current_epoch = 0
    # Check if continuing training from a checkpoint
    if os.path.exists(args.model_name_or_path):
        # set global_step to gobal_step of last saved checkpoint from model path
        try:
            global_step = int(
                args.model_name_or_path.split("-")[-1].split("/")[0])
        except ValueError:
            global_step = 0
        epochs_trained = global_step // (len(train_dataloader) //
                                         args.gradient_accumulation_steps)
        steps_trained_in_current_epoch = global_step % (
            len(train_dataloader) // args.gradient_accumulation_steps)

        logger.info(
            "  Continuing training from checkpoint, will skip to saved global_step"
        )
        logger.info("  Continuing training from epoch %d", epochs_trained)
        logger.info("  Continuing training from global step %d", global_step)
        logger.info("  Will skip the first %d steps in the first epoch",
                    steps_trained_in_current_epoch)

    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(epochs_trained,
                            int(args.num_train_epochs),
                            desc="Epoch",
                            disable=args.local_rank not in [-1, 0])
    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader,
                              desc="Iteration",
                              disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):

            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue

            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": batch[3]
            }
            if args.model_type != "distilbert":
                inputs["token_type_ids"] = (
                    batch[2] if args.model_type in ["bert", "xlnet"] else None
                )  # XLM and RoBERTa don"t use segment_ids

            outputs = model(**inputs)
            loss = outputs[
                0]  # model outputs are always tuple in pytorch-transformers (see doc)

            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.max_grad_norm)

                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                if args.local_rank in [
                        -1, 0
                ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    # Log metrics
                    if (
                            args.local_rank == -1
                            and args.evaluate_during_training
                    ):  # Only evaluate when single GPU otherwise metrics may not average well
                        results, _ = evaluate(args,
                                              model,
                                              tokenizer,
                                              labels,
                                              pad_token_label_id,
                                              mode="dev")
                        for key, value in results.items():
                            tb_writer.add_scalar("eval_{}".format(key), value,
                                                 global_step)
                    tb_writer.add_scalar("lr",
                                         scheduler.get_lr()[0], global_step)
                    tb_writer.add_scalar("loss", (tr_loss - logging_loss) /
                                         args.logging_steps, global_step)
                    logging_loss = tr_loss

                if args.local_rank in [
                        -1, 0
                ] and args.save_steps > 0 and global_step % args.save_steps == 0:
                    # Save model checkpoint
                    output_dir = os.path.join(
                        args.output_dir, "checkpoint-{}".format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    model_to_save = (
                        model.module if hasattr(model, "module") else model
                    )  # Take care of distributed/parallel training
                    model_to_save.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)

                    torch.save(args,
                               os.path.join(output_dir, "training_args.bin"))
                    logger.info("Saving model checkpoint to %s", output_dir)

                    torch.save(optimizer.state_dict(),
                               os.path.join(output_dir, "optimizer.pt"))
                    torch.save(scheduler.state_dict(),
                               os.path.join(output_dir, "scheduler.pt"))
                    logger.info("Saving optimizer and scheduler states to %s",
                                output_dir)

            if args.max_steps > 0 and global_step > args.max_steps:
                epoch_iterator.close()
                break
        if args.max_steps > 0 and global_step > args.max_steps:
            train_iterator.close()
            break

    if args.local_rank in [-1, 0]:
        tb_writer.close()

    return global_step, tr_loss / global_step
Beispiel #58
0
def process_filters(filters, article, only_actions=None):
    skipped, read, liked = False, None, False
    filters = filters or []
    if only_actions is None:
        only_actions = set(FiltersAction)
    for filter_ in filters:
        match = False
        try:
            pattern = filter_.get("pattern", "")
            filter_type = FiltersType(filter_.get("type"))
            filter_action = FiltersAction(filter_.get("action"))
            filter_trigger = FiltersTrigger(filter_.get("action on"))
            if filter_type is not FiltersType.REGEX:
                pattern = pattern.lower()
        except ValueError:
            continue
        if filter_action not in only_actions:
            logger.debug("ignoring filter %r" % filter_)
            continue
        if (
            filter_action
            in {FiltersType.REGEX, FiltersType.MATCH, FiltersType.EXACT_MATCH}
            and "title" not in article
        ):
            continue
        if (
            filter_action in {FiltersType.TAG_MATCH, FiltersType.TAG_CONTAINS}
            and "tags" not in article
        ):
            continue
        title = article.get("title", "").lower()
        tags = [tag.lower() for tag in article.get("tags", [])]
        if filter_type is FiltersType.REGEX:
            match = re.match(pattern, title)
        elif filter_type is FiltersType.MATCH:
            match = pattern in title
        elif filter_type is FiltersType.EXACT_MATCH:
            match = pattern == title
        elif filter_type is FiltersType.TAG_MATCH:
            match = pattern in tags
        elif filter_type is FiltersType.TAG_CONTAINS:
            match = any(pattern in tag for tag in tags)
        take_action = (
            match
            and filter_trigger is FiltersTrigger.MATCH
            or not match
            and filter_trigger is FiltersTrigger.NO_MATCH
        )

        if not take_action:
            continue

        if filter_action is FiltersAction.READ:
            read = True
        elif filter_action is FiltersAction.LIKED:
            liked = True
        elif filter_action is FiltersAction.SKIP:
            skipped = True

    if skipped or read or liked:
        logger.info(
            "%r applied on %r",
            filter_action.value,
            article.get("link") or article.get("title"),
        )
    return skipped, read, liked
Beispiel #59
0
def parse(fn):
    with open(fn) as f:
        data = f.read()

    sents = []
    tmp = []
    for x in data.split("\n"):
        x = x.strip()
        if not x:
            sents.append("\n".join(tmp))
            tmp = []
        else:
            tmp.append(x)

    srl = []
    for i, s in enumerate(sents):
        t_srl = {}
        if not s:
            srl.append([])
            continue
        trips = [x.split()[0:5] for x in s.split("\n")]
        preds = set([" ".join(x[0:2]) for x in trips if "ARG" in x[2]])
        for p in preds:
            ps = p.split(" ")
            args = [
                x for x in trips
                if x[0] == ps[0] and x[1] == ps[1] and "ARG" in x[2]
            ]
            for a in args:
                appnd = tuple(a[2:4])
                if p not in t_srl:
                    t_srl[p] = []
                t_srl[p].append(appnd)
            rargs = [x for x in trips if x[3] == ps[0] and x[4] == ps[1]]
            for a in rargs:
                appnd = (a[2], a[0])
                if p not in t_srl:
                    t_srl[p] = []
                t_srl[p].append(appnd)

        # deal w/ copulas
        cops = [x[0:2] for x in trips if x[0] in copulas]
        for c in cops:
            deps = [x[3] + " " + x[4] for x in trips if x[0:2] == c]
            if any([x in t_srl for x in deps]):
                continue
            else:
                l = sorted([x for x in trips if x[0:2] == c],
                           key=lambda x: x[4])
                arg = 0
                l_list = []
                for x in l:
                    l_list.append(("ARG" + str(arg), x[3]))
                    arg += 1
                t_srl[" ".join(c)] = l_list  #[x[2:4] for x in l]

        #sort by pred order
        ordered = []
        for k in t_srl.keys():
            p, num = k.split(" ")
            ordered.append((int(num), (p, t_srl[k])))
        ordered.sort()
        ordered = [x[1] for x in ordered]
        srl.append(ordered)

    return srl
Beispiel #60
0
 def game( self, **kwargs ):
     name = kwargs[ 'name' ]
     color = kwargs[ 'color' ]
     game_doc = self.game_collection.find_one( { 'name' : name } )
     if not game_doc:
         return self.MakeErrorPage( 'Failed to find game: %s', name )
     go_game = GoGame()
     go_game.Deserialize( game_doc[ 'data' ] )
     whose_turn = 'white' if go_game.whose_turn == GoBoard.WHITE else 'black'
     color_id = GoBoard.WHITE if color == 'white' else GoBoard.BLACK
     move = { 'row' : -1, 'col' : -1 }
     if 'most_recent_move' in game_doc:
         move = game_doc[ 'most_recent_move' ]
     board = go_game.CurrentBoard()
     group_list = {
         GoBoard.WHITE : board.AnalyzeGroups( GoBoard.WHITE ),
         GoBoard.BLACK : board.AnalyzeGroups( GoBoard.BLACK )
     }
     html_board_table = '<table cellspacing="0" cellpadding="0">\n'
     for i in range( board.size ):
         html_board_table += '<tr>'
         for j in range( board.size ):
             html_board_table += '<td class="cell" style="height: 64px; width: 64px;">\n'
             board_back_image = self.DetermineBoardImage( i, j, board.size )
             state = board.GetState( ( i, j ) )
             if state == GoBoard.EMPTY:
                 html_board_table += '<img src="images/%s" onclick="OnPlaceStoneClicked( \'%s\', \'%s\', %d, %d )">\n' % ( board_back_image, name, color, i, j )
                 if any( [ board.GetState( adj_location ) != GoBoard.EMPTY for adj_location in board.AdjacentLocations( ( i, j ) ) ] ):
                     html_board_table += '<img class="lib_img" id="liberty_%d_%d" src="images/liberty.png" style="visibility:hidden"/>\n' % ( i, j )
             else:
                 if state == GoBoard.WHITE:
                     board_fore_image = 'white_stone.png'
                 elif state == GoBoard.BLACK:
                     board_fore_image = 'black_stone.png'
                 hover_calls = self.FormulateLibertyHoverJSCalls( group_list[ state ], i, j )
                 click_calls = 'onclick="OnGiveUpStoneClicked( \'%s\', \'%s\', %d, %d )"' % ( name, color, i, j ) if state == color_id else ''
                 html_board_table += '<img class="back_img" src="images/%s"/>\n' % board_back_image
                 html_board_table += '<img class="fore_img" src="images/%s" %s %s/>\n' % ( board_fore_image, hover_calls, click_calls )
                 if move[ 'row' ] == i and move[ 'col' ] == j:
                     html_board_table += '<img class="high_img" src="images/highlight.png" %s/>\n' % hover_calls
             html_board_table += '</td>\n'
         html_board_table += '</tr>\n'
     html_board_table += '</table>\n'
     html_message = '<p>It is %s\'s turn.  You are %s.</p>' % ( whose_turn, color )
     html_white_info = self.GenerateInfoForColor( go_game, 'white' )
     html_black_info = self.GenerateInfoForColor( go_game, 'black' )
     scores = go_game.CalculateScores()
     html_score_info = '<center><table border="2">\n'
     html_score_info += '<tr><th></th><th>white</th><th>black</th></tr>\n'
     html_score_info += '<tr><td>score</td><td>%d</td><td>%d</td></tr>\n' % ( scores[ GoBoard.WHITE ][ 'score' ], scores[ GoBoard.BLACK ][ 'score' ] )
     html_score_info += '<tr><td>captures</td><td>%d</td><td>%d</td></tr>\n' % ( scores[ GoBoard.WHITE ][ 'captures' ], scores[ GoBoard.BLACK ][ 'captures' ] )
     html_score_info += '<tr><td>territory</td><td>%d</td><td>%d</td></tr>\n' % ( scores[ GoBoard.WHITE ][ 'territory' ], scores[ GoBoard.BLACK ][ 'territory' ] )
     html_score_info += '</table></center>\n'
     html_pass_button = '<p><center><button type="button" onclick="OnPlaceStoneClicked( \'%s\', \'%s\', -1, -1 )">forfeit turn</button>' % ( name, color )
     return '''
     <html lang="en-US">
         <head>
             <title>Go Game: %s</title>
             <link rel="stylesheet" href="css/go.css">
             <script src="https://code.jquery.com/jquery.js"></script>
             <script src="scripts/go.js"></script>
         </head>
         <body onload="OnPageLoad(%s, '%s', '%s')">
             <div>
                 <p><center>%s</center></p>
                 <p><center>%s</center></p>
                 <!--<center><input type="checkbox" id="respond">Have computer respond.</input></center>-->
                 <center>%s</center>
                 %s
                 <p><center>Click an empty board intersection to place a stone.  Click on your own stone to give it up as a prisoner (at end of game.)</center></p>
             </div>
             <div>
                 %s
                 %s
             </div>
         </body>
     </html>
     ''' % ( name, ('true' if whose_turn == color else 'false'), color, name, html_message, html_score_info, html_board_table, html_pass_button, html_white_info, html_black_info )