def main():
    args = args_parse()
    out1 = open("%s.TE.fa"%str(args.fasta).split(".")[0],"w")
    out2 = open("%s.CON.fa"%str(args.fasta).split(".")[0],"w")
    seqslen = get_len(args.fasta)
    seqs = get_seqs(args.fasta)
    TE = get_TE_ids(args.TE)
    pfam_ids = get_pfam(args.infile)
    TE_ids = []

    for line in open(args.infile,"r"):
        if "#" not in line and len(line.strip().split()) == 15 :
            ids = line.strip().split()[0]
            start = int(line.strip().split()[1])
            end = int(line.strip().split()[2])
            hmm = line.strip().split()[5]

            if hmm.split(".")[0] in TE:
                per = (float(end)-float(start)+float(1))/float(seqslen.get(ids))
                if per >= float(0.3):
                    TE_ids.append(ids)
                    out1.write(">%s\t%f\n%s\n"%(ids,per,seqs.get(ids)))
    set1 = set(TE_ids)
    set2 = set(seqs.keys())
    for retain_ids in set2 - set1:
        try:
            out2.write(">%s\t%s\n%s\n"%(retain_ids,pfam_ids[retain_ids],seqs.get(retain_ids)))
        except KeyError:
            out2.write(">%s\tNon-domains\n%s\n"%(retain_ids,seqs.get(retain_ids)))
Esempio n. 2
0
	def route(self, minz):
		try:
			self.paths = []
			self.sub_terminal_collision_lines()
			visited = set()
			for index in xrange(1, len(self.terminals)):
				visited |= set([(int(self.terminals[index - 1][2][0]+0.5), int(self.terminals[index - 1][2][1]+0.5), z) for z in xrange(self.pcb.depth)])
				ends = [(int(self.terminals[index][2][0]+0.5), int(self.terminals[index][2][1]+0.5), z) for z in xrange(self.pcb.depth)]
				self.pcb.mark_distances(self.pcb.routing_flood_vectors, self.radius, self.via, self.gap, visited, ends)
				ends = [(self.pcb.get_node(node), node) for node in ends]
				ends.sort()
				_, end = ends[0]
				path = [end]
				while path[-1] not in visited:
					nearer_nodes = self.pcb.all_not_shorting(self.pcb.all_nearer_sorted, \
								(self.pcb.routing_path_vectors, path[-1], end, self.pcb.dfunc), path[-1], self.radius, self.via, self.gap)
					next_node = next(nearer_nodes)
					if minz:
						for node in nearer_nodes:
							if node[2] == path[-1][2]:
								next_node = node
								break
					path.append(next_node)
				visited |= set(path)
				self.paths.append(path)
				self.pcb.unmark_distances()
			self.paths = self.optimise_paths(self.paths)
			self.add_paths_collision_lines()
			self.add_terminal_collision_lines()
			return True
		except StopIteration:
			self.pcb.unmark_distances()
			self.remove()
			return False
Esempio n. 3
0
    def _get_router_ids_for_agent(self, context, agent_db, router_ids):
        result_set = set(super(L3_DVRsch_db_mixin,
                            self)._get_router_ids_for_agent(
            context, agent_db, router_ids))
        router_ids = set(router_ids or [])
        if router_ids and result_set == router_ids:
            # no need for extra dvr checks if requested routers are
            # explicitly scheduled to the agent
            return list(result_set)

        # dvr routers are not explicitly scheduled to agents on hosts with
        # dvr serviceable ports, so need special handling
        if (self._get_agent_mode(agent_db) in
            [n_const.L3_AGENT_MODE_DVR,
             n_const.L3_AGENT_MODE_DVR_NO_EXTERNAL,
             n_const.L3_AGENT_MODE_DVR_SNAT]):
            if not router_ids:
                result_set |= set(self._get_dvr_router_ids_for_host(
                    context, agent_db['host']))
            else:
                for router_id in (router_ids - result_set):
                    subnet_ids = self.get_subnet_ids_on_router(
                        context, router_id)
                    if (subnet_ids and
                            self._check_dvr_serviceable_ports_on_host(
                                    context, agent_db['host'],
                                    list(subnet_ids))):
                        result_set.add(router_id)

        return list(result_set)
    def neargroups(self, blocknames):
        """Given a list or set of block names, finds groups of 'near' blocks.  Blocks are assigned the same group
        if they are neighbours, or share a neighbour."""
        blocknames = list(set(blocknames))
        groups = []
        for blk in blocknames:
            groups.append(set([blk]))
        from copy import copy

        done = False
        while not done:
            done = True
            for i, g in enumerate(groups):
                ng = copy(g)
                for blk in g:
                    ng = ng | self.block[blk].neighbour_name
                if i < len(groups) - 1:
                    for g2 in groups[i + 1 :]:
                        ng2 = copy(g2)
                        for blk in g2:
                            ng2 = ng2 | self.block[blk].neighbour_name
                        if ng & ng2:
                            g.update(g2)
                            groups.remove(g2)
                            done = False
                            break
                    if not done:
                        break
        return groups
Esempio n. 5
0
def testStandingsBeforeMatches():
    """
    Test to ensure players are properly represented in standings prior
    to any matches being reported.
    """
    deleteMatches()
    deletePlayers()
    registerPlayer("Melpomene Murray")
    registerPlayer("Randy Schwartz")
    standings = playerStandings()
    if len(standings) < 2:
        raise ValueError("Players should appear in playerStandings even before "
                         "they have played any matches.")
    elif len(standings) > 2:
        raise ValueError("Only registered players should appear in standings.")
    if len(standings[0]) != 4:
        raise ValueError("Each playerStandings row should have four columns.")
    [(id1, name1, wins1, matches1), (id2, name2, wins2, matches2)] = standings
    if matches1 != 0 or matches2 != 0 or wins1 != 0 or wins2 != 0:
        raise ValueError(
            "Newly registered players should have no matches or wins.")
    if set([name1, name2]) != set(["Melpomene Murray", "Randy Schwartz"]):
        raise ValueError("Registered players' names should appear in standings, "
                         "even if they have no matches played.")
    print "6. Newly registered players appear in the standings with no matches."
Esempio n. 6
0
    def __init__(self, data_dir_name):
        '''
        Read meta-data for test data
        and set table to load based on a given order or on schema files in
        input data
        @param data_dir_name: path to directory containing test data and
                              configuration files
        '''
        self.log = logging.getLogger(__name__)
        self.dataDir = data_dir_name

        _topLevelConfigFile = os.path.join(self.dataDir, "description.yaml")
        with io.open(_topLevelConfigFile, 'r') as f:
            self.update(yaml.load(f))

        self.log.debug("Data configuration : %s" % self)

        fromFileTables = self._tableFromSchemaFile()
        # a specific load order on a restricted number of tables
        # ca be specified in yaml
        if not self['tables'].get('load-order'):
            self['tables']['load-order'] = fromFileTables
            self.notLoadedTables = []
        else:
            self.notLoadedTables = list(set(fromFileTables) -
                                        set(self.orderedTables))
        self.log.debug("Tables to load : %s", self.orderedTables)
	def _load(self, src, text_src):
		if isinstance(src, PredictionResult):
			pass
		elif isinstance(src, str):
			result = PredictionResult()
			result.load(src)
		else:
			raise Exception('"result" should be PredictionResult or string.')
	
		if not result.analyzable():
			raise ValueError('The given result is not analyzable.')
	
		# +++ Need to move to another place.			   
		#if self.model._hashcode != result.model_id:
		#	sys.stderr.write('Warning: model ID is different from that in the predicted result. Do you use a different model to analyze?\n')
	
		if text_src is None:
			self.filepath = result.text_src
		else:
			self.filepath = text_src
		self.extra_svm_files = result.extra_svm_files
		predicted_y = result.predicted_y
		self.acc = result.get_accuracy()
		decvals = result.decvals
		true_y = result.true_y
				   
		self.insts, self.true_labels, self.predict_labels = [], set(), set()
		for idx in range(len(true_y)):
			self.insts += [TextInstance(idx, true_y = true_y[idx], predicted_y = predicted_y[idx], decvals = list(decvals[idx]))]
			self.true_labels.add(true_y[idx])
			self.predict_labels.add(predicted_y[idx])
Esempio n. 8
0
    def _init_settings(cls, matrix):
        # Проверяем наличие ошибок неправильного заполнения таблицы свойств
        if not cls._check_shape(fact=matrix.shape,
                                req=cls._required_settings_shape):
            raise WrongShapeException(fact=matrix.shape,
                                      req=cls._required_settings_shape,
                                      name="Проверка размерности таблицы с общими настройками",
                                      aud=cls.outer_name)
        if not cls._check_nans(fact=matrix):
            raise NansInMatrixException(name="Проверка наличия отсутствующих значений в общих настройках",
                                        aud=cls.outer_name)
        # Чтобы проверить саму табличку надо проделать несколько махинаций, ведь по умолчанию все в виде матриц
        settings = pd.DataFrame(matrix[1:], columns=matrix[0])
        settings.columns = cls._standard_settings_column_names
        settings.set_index("key", inplace=True)
        # Проверяем все ли настнойки внесены в табличку
        if not cls._check_settings(fact=set(settings.index),
                                   req=cls._required_settings_options):
            raise NotEnoughSettings(fact=set(settings.index),
                                    req=cls._required_settings_options,
                                    name="Проверка вхождения всех необходимых\
переменных по ключу в общих настройках",
                                    aud=cls.outer_name)
        # Проверяем, что это именно то, что мы ожидали получить на входе
        if not cls._check_values_condition(fact=settings["code"].to_dict(),
                                           req=cls._required_settings_values_condition):
            raise ValuesConditionException(fact=settings["code"].to_dict(),
                                           req=cls._required_settings_values_condition,
                                           name="Проверка валидности ввода настроек в таблицу с общими настройками",
                                           aud=cls.outer_name)
        cls.settings = settings["code"].to_dict()
Esempio n. 9
0
 def __init__(self, deviceRef):
     # Check that we've got a valid IOHIDDevice.
     assert(deviceRef)
     assert(cf.CFGetTypeID(deviceRef) == iokit.IOHIDDeviceGetTypeID())
     _device_lookup[deviceRef.value] = self
     self.deviceRef = deviceRef
     # Set attributes from device properties.
     self.transport = self.get_property("Transport")
     self.vendorID = self.get_property("VendorID")
     self.vendorIDSource = self.get_property("VendorIDSource")
     self.productID = self.get_property("ProductID")
     self.versionNumber = self.get_property("VersionNumber")
     self.manufacturer = self.get_property("Manufacturer")
     self.product = self.get_property("Product")
     self.serialNumber = self.get_property("SerialNumber")  # always returns None; apple bug?
     self.locationID = self.get_property("LocationID")
     self.primaryUsage = self.get_property("PrimaryUsage")
     self.primaryUsagePage = self.get_property("PrimaryUsagePage")
     # Populate self.elements with our device elements.
     self.get_elements()        
     # Set up callback functions.
     self.value_observers = set()
     self.removal_observers = set()
     self.register_removal_callback()
     self.register_input_value_callback()
Esempio n. 10
0
def get_adjacency_lists(in_file):
    edges = {}
    verts = {}
    edge_count = 0
    with open(in_file) as f:
        for line in f.readlines():
            vertex = line.split()
            v1 = int(vertex[0])
            for v2_s in vertex[1:]:
                v2 = int(v2_s)
                if v2 > v1:
                    # avoid adding duplicated edges in the loaded graph
                    try:
                        verts[v1].add(edge_count)  # edges in v1
                    except KeyError:
                        verts[v1] = set()
                        verts[v1].add(edge_count)
                    try:
                        verts[v2].add(edge_count)  # edges in v2
                    except KeyError:
                        verts[v2] = set()
                        verts[v2].add(edge_count)

                    edges[edge_count] = [v1, v2]
                    edge_count += 1

    return edges, verts
Esempio n. 11
0
 def get_analysis_analysisID_dataStage01ResequencingAnalysis(self,analysis_id_I):
     '''Query rows that are used from the analysis'''
     try:
         data = self.session.query(data_stage01_resequencing_analysis).filter(
                 data_stage01_resequencing_analysis.analysis_id.like(analysis_id_I),
                 data_stage01_resequencing_analysis.used_.is_(True)).all();
         analysis_id_O = []
         experiment_id_O = []
         lineage_name_O = []
         sample_name_O = []
         analysis_type_O = []
         analysis_O = {};
         if data: 
             for d in data:
                 analysis_id_O.append(d.analysis_id);
                 experiment_id_O.append(d.experiment_id);
                 lineage_name_O.append(d.lineage_name);
                 sample_name_O.append(d.sample_name);
                 analysis_type_O.append(d.analysis_type);
             analysis_id_O = list(set(analysis_id_O))
             experiment_id_O = list(set(experiment_id_O))
             lineage_name_O = list(set(lineage_name_O))
             sample_name_O = list(set(sample_name_O))
             analysis_type_O = list(set(analysis_type_O))
             analysis_O={
                     'analysis_id':analysis_id_O,
                     'experiment_id':experiment_id_O,
                     'lineage_name':lineage_name_O,
                     'sample_name':sample_name_O,
                     'analysis_type':analysis_type_O};
             
         return analysis_O;
     except SQLAlchemyError as e:
         print(e);
Esempio n. 12
0
def get_all_group_lines(import_groups):
  if not import_groups:
    return []

  def get_group_lines(group):
    def comparator(x, y):
      # These shenanigans are used to properly order imports for inner classes.
      # So we get ordering like:
      # import com.foo.Bar;
      # import com.foo.Bar.Baz;
      # (this is not lexicographical, so normal sort won't suffice)
      x_m = IMPORT_CLASS_RE.match(x)
      y_m = IMPORT_CLASS_RE.match(y)
      if x_m.group('outer') == y_m.group('outer'):
        return cmp(x_m.group('inners'), y_m.group('inners'))
      else:
        return cmp(x, y)
    lines = sorted(import_groups[group], comparator)
    lines.append('')
    return lines

  all_lines = []
  explicit_groups = ['java', 'javax', 'scala', 'com', 'net', 'org']
  for group in explicit_groups:
    if group in import_groups:
      all_lines += get_group_lines(group)

  # Gather remaining groups.
  remaining_groups = sorted(set(import_groups.keys()) - set(explicit_groups))
  for group in remaining_groups:
    all_lines += get_group_lines(group)
  return all_lines
Esempio n. 13
0
    def __init__(self, config):
        self.populate_logger()

        self.config = config

        mozinfo.find_and_update_from_json(config.topobjdir)

        # Python 2.6 doesn't allow unicode keys to be used for keyword
        # arguments. This gross hack works around the problem until we
        # rid ourselves of 2.6.
        self.info = {}
        for k, v in mozinfo.info.items():
            if isinstance(k, unicode):
                k = k.encode('ascii')
            self.info[k] = v

        self._libs = OrderedDefaultDict(list)
        self._binaries = OrderedDict()
        self._linkage = []
        self._static_linking_shared = set()

        # Keep track of external paths (third party build systems), starting
        # from what we run a subconfigure in. We'll eliminate some directories
        # as we traverse them with moz.build (e.g. js/src).
        subconfigures = os.path.join(self.config.topobjdir, 'subconfigures')
        paths = []
        if os.path.exists(subconfigures):
            paths = open(subconfigures).read().splitlines()
        self._external_paths = set(mozpath.normsep(d) for d in paths)
        # Add security/nss manually, since it doesn't have a subconfigure.
        self._external_paths.add('security/nss')
Esempio n. 14
0
  def decorated(self, **kwargs):
    """A wrapped test method that treats some arguments in a special way."""
    mode = kwargs.pop("mode", "graph")

    distribution = kwargs.get("distribution", None)
    required_tpu = kwargs.pop("required_tpu", False)
    required_gpus = kwargs.pop("required_gpus", None)

    if distribution:
      assert required_gpus is None, (
          "Do not use `required_gpus` and `distribution` together.")
      assert required_tpu is False, (
          "Do not use `required_tpu` and `distribution` together.")
      required_gpus = distribution.required_gpus
      required_tpu = distribution.required_tpu

    if required_tpu and not TPU_TEST:
      self.skipTest("Test requires a TPU, but it's not available.")
    if not required_tpu and TPU_TEST:
      self.skipTest("Test that doesn't require a TPU.")

    if not required_gpus:
      if GPU_TEST:
        self.skipTest("Test that doesn't require GPUs.")
    elif context.num_gpus() < required_gpus:
      self.skipTest(
          "{} GPUs are not available for this test. {} GPUs are available".
          format(required_gpus, context.num_gpus()))

    # At this point, `kwargs` doesn't have `required_gpus` or `required_tpu`
    # that the user might have specified.  `kwargs` still has `mode`, which
    # the test is allowed to accept or ignore.
    requested_arguments = tf_inspect.getfullargspec(test_method).args
    missing_arguments = set(list(kwargs.keys()) + ["self"]).difference(
        set(requested_arguments + ["mode"]))
    if missing_arguments:
      raise ValueError("The test is missing arguments {} .".format(
          missing_arguments))

    kwargs_to_pass = {}
    for arg in requested_arguments:
      if arg == "self":
        kwargs_to_pass[arg] = self
      else:
        kwargs_to_pass[arg] = kwargs[arg]

    if mode == "eager":
      with ops.Graph().as_default(), context.eager_mode():
        if distribution:
          kwargs_to_pass["distribution"] = distribution.strategy
        test_method(**kwargs_to_pass)
    elif mode == "graph":
      with ops.Graph().as_default(), context.graph_mode():
        if distribution:
          kwargs_to_pass["distribution"] = distribution.strategy
        test_method(**kwargs_to_pass)
    else:
      raise ValueError(
          "'mode' has to be either 'eager' or 'graph' and not {}".format(
              mode))
Esempio n. 15
0
def times(*combined):
  """Generate a product of N sets of combinations.

  times(combine(a=[1,2]), combine(b=[3,4])) == combine(a=[1,2], b=[3,4])

  Args:
    *combined: N lists of dictionaries that specify combinations.

  Returns:
    a list of dictionaries for each combination.

  Raises:
    ValueError: if some of the inputs have overlapping keys.
  """
  assert combined

  if len(combined) == 1:
    return combined[0]

  first = combined[0]
  rest_combined = times(*combined[1:])

  combined_results = []
  for a in first:
    for b in rest_combined:
      if set(a.keys()).intersection(set(b.keys())):
        raise ValueError("Keys need to not overlap: {} vs {}".format(
            a.keys(), b.keys()))

      combined_results.append(OrderedDict(list(a.items()) + list(b.items())))
  return combined_results
Esempio n. 16
0
    def _get_episode_search_strings(self, ep_obj, add_string=''):

        search_string = {'Episode': []}

        if not ep_obj:
            return []

        if self.show.air_by_date:
            for show_name in set(show_name_helpers.allPossibleShowNames(self.show)):
                ep_string = sanitizeSceneName(show_name) + '.' + \
                            str(ep_obj.airdate).replace('-', '|')
                search_string['Episode'].append(ep_string)
        elif self.show.sports:
            for show_name in set(show_name_helpers.allPossibleShowNames(self.show)):
                ep_string = sanitizeSceneName(show_name) + '.' + \
                            str(ep_obj.airdate).replace('-', '|') + '|' + \
                            ep_obj.airdate.strftime('%b')
                search_string['Episode'].append(ep_string)
        elif self.show.anime:
            for show_name in set(show_name_helpers.allPossibleShowNames(self.show)):
                ep_string = sanitizeSceneName(show_name) + '.' + \
                            "%i" % int(ep_obj.scene_absolute_number)
                search_string['Episode'].append(ep_string)
        else:
            for show_name in set(show_name_helpers.allPossibleShowNames(self.show)):
                ep_string = show_name_helpers.sanitizeSceneName(show_name) + '.' + \
                            sickbeard.config.naming_ep_type[2] % {'seasonnumber': ep_obj.scene_season,
                                                                  'episodenumber': ep_obj.scene_episode} + ' %s' % add_string

                search_string['Episode'].append(re.sub('\s+', '.', ep_string))

        return [search_string]
Esempio n. 17
0
def plot_overtime(data_file):
    data = performance.load_score_dict(data_file)

    avg_sim = []
    std_sim = []

    # Lets compute the average fraction of matching paths for each case
    for index, time_step in enumerate(data):
        if index == 0:
            continue
        prev_step = data[index - 1] 


        sim_list = []

        
        for pair_index, pair in enumerate(time_step):

            curr_chain = set([x[0] for x in pair])
            print curr_chain
            prev_chain = set([x[0] for x in prev_step[pair_index]])

            if len(curr_chain) == 0 or len(prev_chain) == 0:
                continue

            sim = float(len(curr_chain & prev_chain)) / len(curr_chain)
            
            sim_list.append(sim)

        avg_sim.append(np.mean(sim_list)) 
        std_sim.append(np.std(sim_list))

        print "Next Time Step!"

    plotting.overtime_plot(avg_sim, std_sim)  
Esempio n. 18
0
def ind_complement(v, ind):
    if isinstance(ind, _INDEXTYPES):
        ind = [ind]
    elif type(ind) is slice:
        ind = range(*ind.indices(len(v)))
    l = len(v)
    return sorted(set(range(l)) - set(i if i >= 0 else l+i for i in ind))
Esempio n. 19
0
 def getHoster(self):     
     # If no accounts are available there will be no hosters available
     if not self.account or not self.account.canUse():
         return []
     
     # Get account data
     (user, data) = self.account.selectAccount()
     
     # Get supported hosters list from premiumize.me using the json API v1 (see https://secure.premiumize.me/?show=api)
     answer = getURL("https://api.premiumize.me/pm-api/v1.php?method=hosterlist&params[login]=%s&params[pass]=%s" % (user, data['password']))
     data = json_loads(answer)
     
     
     # If account is not valid thera are no hosters available
     if data['status'] != 200:
         return []
     
     # Extract hosters from json file 
     hosters = set(data['result']['hosterlist'])
 
             
     # Read config to check if certain hosters should not be handled
     configMode = self.getConfig('hosterListMode')
     if configMode in ("listed", "unlisted"):
         configList = set(self.getConfig('hosterList').strip().lower().replace('|',',').replace(';',',').split(','))
         configList.discard(u'')
         if configMode == "listed":
             hosters &= configList
         else:
             hosters -= configList
     
     return list(hosters)      
Esempio n. 20
0
def _dict_diff(a, b):
    """A one way dictionary diff.

    a: a dictionary
    b: a dictionary

    Returns: True if the dictionaries are different
    """
    # Only things the master has which the slave lacks matter
    if set(a.keys()) - set(b.keys()):
        LOG.debug('metadata diff -- master has extra keys: %(keys)s',
                  {'keys': ' '.join(set(a.keys()) - set(b.keys()))})
        return True

    for key in a:
        if str(a[key]) != str(b[key]):
            LOG.debug('metadata diff -- value differs for key '
                      '%(key)s: master "%(master_value)s" vs '
                      'slave "%(slave_value)s"',
                      {'key': key,
                       'master_value': a[key],
                       'slave_value': b[key]})
            return True

    return False
Esempio n. 21
0
def calculateSparseDictCOO(data_set, data_label_hash, jump=1, valid_flag=False):
	row = []
	col = []
	data = []
	row_valid = []
	col_valid = []
	data_valid = []

	doc_ids = set(sorted(map(lambda row:int(row[0]), data_set)))
	base_ids_list = filter(lambda ids: ids % jump == 0, doc_ids)
	train_ids = base_ids_list
	valid_ids = set()
	if valid_flag:
		valid_index = filter(lambda ids: ids % validation_perc == 0, range(len(base_ids_list)))
		valid_ids = [base_ids_list[i] for i in valid_index]
		base_ids = set(base_ids_list)
		train_ids = sorted(base_ids - set(valid_ids))

	labels = map(lambda trid: int(data_label_hash[trid]), train_ids)
	labels_valid = map(lambda vlid: int(data_label_hash[vlid]), valid_ids)
	for i in range(len(data_set)):
		if int(data_set[i][0]) in train_ids:
			row.append(int(data_set[i][0]))
			col.append(int(data_set[i][1])-1)
			data.append(int(data_set[i][2]))
			# labels.append(int(data_label_hash[int(data_set[i][0])]))
		elif int(data_set[i][0]) in valid_ids:
			row_valid.append(int(data_set[i][0]))
			col_valid.append(int(data_set[i][1])-1)
			data_valid.append(int(data_set[i][2]))
			# labels_valid.append(int(data_label_hash[int(data_set[i][0])]))

	train = translate(row), col, data, labels
	valid = translate(row_valid), col_valid, data_valid, labels_valid
	return train, valid
Esempio n. 22
0
def includes_for_type(idl_type):
    idl_type = idl_type.preprocessed_type

    # Composite types
    if idl_type.native_array_element_type:
        return includes_for_type(idl_type)

    # Simple types
    base_idl_type = idl_type.base_type
    if base_idl_type in INCLUDES_FOR_TYPE:
        return INCLUDES_FOR_TYPE[base_idl_type]
    if idl_type.is_basic_type:
        return set()
    if idl_type.is_typed_array_type:
        # Typed array factory methods are already provided by DartUtilities.h.
        return set([])
    if base_idl_type.endswith('ConstructorConstructor'):
        # FIXME: rename to NamedConstructor
        # FIXME: replace with a [NamedConstructorAttribute] extended attribute
        # Ending with 'ConstructorConstructor' indicates a named constructor,
        # and these do not have header files, as they are part of the generated
        # bindings for the interface
        return set()
    if base_idl_type.endswith('Constructor'):
        # FIXME: replace with a [ConstructorAttribute] extended attribute
        base_idl_type = idl_type.constructor_type_name
    if base_idl_type not in component_dir:
        return set()
    return set(['gen/sky/bindings/Dart%s.h' % base_idl_type])
Esempio n. 23
0
def load_data(name, plotdir, print_out=True):
    "Read data and split into train, test data."
    df = read_data(name)
    train, test = train_test_split(df, test_size=0.3)
#   plot_scatter_matrix(train, plotdir)  # takes a while, not that useful 
    yvars = ['risk', 'Y']
    train_y = train[yvars]
    test_y  = test[yvars]
#   train_r = train['risk']    # for five-way multi-class classification
    train = train.drop(['risk', 'Y'], axis=1)
    test  = test.drop(['risk', 'Y'],  axis=1)
    if print_out:
        print("train test types %s %s %s %s" % (type(train), type(test), type(train_y), type(test_y)))
        print("train test shapes %s %s %s %s" % (train.shape, test.shape, train_y.shape, test_y.shape))
        print("train head\n%s" % (train[:3]))
        print("test head\n%s" % (test[:3]))
        print("train_y set %s, test_y set %s" % (set(train_y['Y']), set(test_y['Y'])))
        print("train_y stats\n%s\ntest_y stats\n%s" % (train_y.describe(), test_y.describe()))

#   drop_col = ['b_sugar_up']
#   print('dropping high std/mean columns', drop_col)
#   train = train.drop(drop_col, axis=1)
#   test  = test.drop(drop_col, axis=1)
#   drop_col = ['age','exer_slope']
#   print('dropping low importance columns', drop_col)
#   train = train.drop(drop_col, axis=1)
#   test  = test.drop(drop_col, axis=1)
    return train, test, train_y, test_y
    def extract_features(self, tweet_message):

        if len(self.bag_of_words) == 0:
            printf('Bag-of-Words empty!')
            return None

        tweet_words = [word.lower() for word, tag in tweet_message if word not in stopwords and not word.isdigit()]
        tweet_tags = [tag[:2] for word, tag in tweet_message if word not in stopwords and not word.isdigit()]

        feature_set = {}

        # 1st set of features: bag-of-words
        for word in self.bag_of_words:
            feature_set['has_'+word] = (word in tweet_words)

        # 2nd set of features: the tags present in the message
        for tag in ['NN','VG','CD','JJ','CC','RB']:
            feature_set['has_'+tag] = (tag in tweet_tags)

        # 3rd feature: negation is present?
        negators = set(['not', 'none', 'nobody', 'never', 'nothing', 'lack', 't','n\'t','dont', 'no'])
        if len(negators.intersection(set(tweet_words))) > 0:
            feature_set['has_negator'] = True

        return feature_set
Esempio n. 25
0
def copy_apps(hemps_path, testcase_path, apps_name_list):
    #--------------  COPIES ALL APP SOURCE FILES RELATED INTO TESTCASE FILE ----------------
    source_app_path = hemps_path+"/applications/"
    testcase_app_path = testcase_path+"/applications/"
    
    create_ifn_exists(testcase_app_path)
        
    #for each app described into testcase file
    for app_name in apps_name_list:

        source_app_dir = source_app_path + app_name
        target_app_dir = testcase_app_path + app_name
        
        generic_copy(source_app_dir, target_app_dir, [".svn"])
        
    apps_in_testcase = []
    
    #List as directories from applications directory
    for tc_app in os.listdir(testcase_app_path):
        if os.path.isdir(testcase_app_path+tc_app):
            apps_in_testcase.append(tc_app)
        
    #Remove the apps already present into testcase 
    to_remove_apps = list ( set(apps_in_testcase) - set(apps_name_list) )
    
    for to_remove_app in to_remove_apps:
        delete_if_exists(testcase_app_path + to_remove_app)
def test_parameter_grid():
    """Test basic properties of ParameterGrid."""
    params1 = {"foo": [1, 2, 3]}
    grid1 = ParameterGrid(params1)
    assert_true(isinstance(grid1, Iterable))
    assert_true(isinstance(grid1, Sized))
    assert_equal(len(grid1), 3)

    params2 = {"foo": [4, 2],
               "bar": ["ham", "spam", "eggs"]}
    grid2 = ParameterGrid(params2)
    assert_equal(len(grid2), 6)

    # loop to assert we can iterate over the grid multiple times
    for i in xrange(2):
        # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2)
        points = set(tuple(chain(*(sorted(p.items())))) for p in grid2)
        assert_equal(points,
                     set(("bar", x, "foo", y)
                         for x, y in product(params2["bar"], params2["foo"])))

    # Special case: empty grid (useful to get default estimator settings)
    empty = ParameterGrid({})
    assert_equal(len(empty), 1)
    assert_equal(list(empty), [{}])

    has_empty = ParameterGrid([{'C': [1, 10]}, {}])
    assert_equal(len(has_empty), 3)
    assert_equal(list(has_empty), [{'C': 1}, {'C': 10}, {}])
Esempio n. 27
0
def dedup_value(body, ctype, action="dedup_value", prop=None):
    '''
    Service that accepts a JSON document and enriches the prop field of that document by:

    a) Removing duplicates
    '''

    if prop:
        try:
            data = json.loads(body)
        except:
            response.code = 500
            response.add_header('content-type', 'text/plain')
            return "Unable to parse body as JSON"

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    for p in prop.split(","):
        if exists(data, p):
            v = getprop(data, p)
            if isinstance(v, list):
                # Remove whitespace, periods, parens, brackets
                clone = [re.sub("[ \.\(\)\[\]\{\}]", "", s).lower() for s in v]
                # Get index of unique values
                index = list(set([clone.index(s) for s in list(set(clone))]))
            
                setprop(data, p, [v[i] for i in index])

    return json.dumps(data)
Esempio n. 28
0
  def _computeConflicts( self ):
    self.conflicts = []
    self.warnings = []
    nonterminalUsageMap = {N: list() for N in self.nonterminals} # maps nonterminal to rules that use this nonterminal in its production

    for R in self.expandedRules:
      for M in R.production.morphemes:
        if isinstance(M, NonTerminal):
          nonterminalUsageMap[M].append(R)

    for N in self.nonterminals:
      if self._empty in self.first[N] and len(self.first[N].intersection(self.follow[N])):
        self.conflicts.append( FirstFollowConflict( N, self.first[N], self.follow[N] ) )

      if not len(nonterminalUsageMap[N]) and not N.generated:
        self.warnings.append(UnusedNonterminalWarning(N))

      NR = self.getExpandedRules( N )
      if len(NR) == 0:
        self.conflicts.append( UndefinedNonterminalConflict(N) )
      for x in range(len(NR)):
        for y in range(len(NR)):
          if x == y:
            continue

          xR = self._pfirst(NR[x].production)
          yR = self._pfirst(NR[y].production)
          intersection = xR.intersection(yR.difference({self._empty}))
          if intersection != set():
            self.conflicts.append( FirstFirstConflict(NR[x], NR[y], self) )
    for macro in self.macros:
      if isinstance(macro, MorphemeListMacro):
        if self.first[macro.morpheme].intersection(self.follow[macro]) != set():
          self.conflicts.append( ListFirstFollowConflict(macro, self.first[macro.nonterminal], self.follow[macro]) )
    return self.conflicts
Esempio n. 29
0
    def get_sendable_users(self, project):
        conf_key = self.get_conf_key()

        alert_settings = dict(
            (o.user_id, int(o.value))
            for o in UserOption.objects.filter(
                project=project,
                key='%s:alert' % conf_key,
            )
        )

        disabled = set(u for u, v in alert_settings.iteritems() if v == 0)

        member_set = set(project.member_set.exclude(
            user__in=disabled,
        ).values_list('user', flat=True))

        # determine members default settings
        members_to_check = set(u for u in member_set if u not in alert_settings)
        if members_to_check:
            disabled = set(UserOption.objects.filter(
                key='subscribe_by_default',
                value='0',
                user__in=members_to_check,
            ).values_list('user', flat=True))
            member_set = filter(lambda x: x not in disabled, member_set)

        return member_set
    def test00_pg_hba_conf_file(self):
        os.environ[self.GP_COMMAND_FAULT_POINT] = 'gpexpand tar segment template'

        cmd = Command(name='run gpexpand', cmdStr='gpexpand -D %s -i %s' % (self.TEST_DB, self.EXPANSION_INPUT_FILE))
        with self.assertRaisesRegexp(ExecutionError, 'Fault Injection'):
            cmd.run(validateAfter=True)
        
        #Read from the pg_hba.conf file and ensure that 
        #The address of the new hosts is present.
        cmd = Command(name='get the temp pg_hba.conf file', 
                      cmdStr="ls %s" % os.path.join(os.path.dirname(self.MASTER_DATA_DIRECTORY),
                                                    'gpexpand*',
                                                    'pg_hba.conf'))
        cmd.run(validateAfter=True)
        results = cmd.get_results()
        temp_pg_hba_conf = results.stdout.strip() 

        actual_values = set()
        expected_values = set([self.primary_host_address, self.mirror_host_address])
        with open(temp_pg_hba_conf) as f:
            for line in f:
                if line.strip() == '# %s' % self.primary_host_name or\
                   line.strip() == '# %s' % self.mirror_host_name:
                    address = f.next().strip().split()[3]
                    address = address[:address.rfind('/')]
                    actual_values.add(address)

        self.assertEqual(actual_values, expected_values)

        GpStart(name='start the database in master only mode', masterOnly=True).run(validateAfter=True)
        Command(name='rollback the expansion', cmdStr='gpexpand -r -D %s' % self.TEST_DB).run(validateAfter=True)
        GpStart(name='start the database').run(validateAfter=True)
def gen_mapping(data):
    data_set = set(data)
    return {x: y for x, y in zip(data_set, range(len(data_set)))}
from nltk.stem.lancaster import LancasterStemmer
app = Flask(__name__)

# Config MySQL
app.config['MYSQL_HOST'] = 'localhost'
app.config['MYSQL_USER'] = '******'
app.config['MYSQL_PASSWORD'] = '******'
app.config['MYSQL_DB'] = 'bass'
app.config['MYSQL_CURSORCLASS'] = 'DictCursor'

# Config Paths
app.config['UPLOAD_PATH'] = "static/uploads/"
app.config['PRODUCT_PATH'] = "static/products/"

# Image extensions allowed
ALLOWED_EXTENSIONS = set(['txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'])

# Init MySQL
mysql = MySQL(app)
'''
    ============================================
            RECOMMENDATION SYSTEM CODE STARTS
    ============================================
'''

# ===============================================================
# TRAIN THE ENGINE
# ===============================================================

ds = pd.read_csv("recommendation//WebProducts.csv")
def load_policy(filename):
    def read_layer(l):
        assert list(l.keys()) == ['AffineLayer']
        assert sorted(l['AffineLayer'].keys()) == ['W', 'b']
        W, b = l['AffineLayer']['W'].astype(np.float32), l['AffineLayer']['b'].astype(np.float32)
        return lambda x: np.matmul(x, W) + b
        
    def build_nonlin_fn(nonlin_type):
        if nonlin_type == 'lrelu':
            leak = 0.01 # openai/imitation nn.py:233
            return lambda x: 0.5 * (1 + leak) * x + 0.5 * (1 - leak) * np.abs(x)
        elif nonlin_type == 'tanh':
            return lambda x: np.tanh(x)
        else:
            raise NotImplementedError(nonlin_type)
    
    with open(filename, 'rb') as f:
        data = pickle.loads(f.read())

    # assert len(data.keys()) == 2
    nonlin_type = data['nonlin_type']
    nonlin_fn = build_nonlin_fn(nonlin_type)
    policy_type = [k for k in data.keys() if k != 'nonlin_type'][0]

    assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type)
    policy_params = data[policy_type]

    assert set(policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'}
    
    # Build observation normalization layer
    assert list(policy_params['obsnorm'].keys()) == ['Standardizer']
    obsnorm_mean = policy_params['obsnorm']['Standardizer']['mean_1_D']
    obsnorm_meansq = policy_params['obsnorm']['Standardizer']['meansq_1_D']
    obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean)))
    #print('obs', obsnorm_mean.shape, obsnorm_stdev.shape)

    
    # Build hidden layers
    assert list(policy_params['hidden'].keys()) == ['FeedforwardNet']
    layer_params = policy_params['hidden']['FeedforwardNet']
    layers = []
    for layer_name in sorted(layer_params.keys()):
        l = layer_params[layer_name]
        fc_layer = read_layer(l)
        layers += [fc_layer, nonlin_fn]

    # Build output layer
    fc_layer = read_layer(policy_params['out'])
    layers += [fc_layer]
    layers_forward = lambda inp: reduce(lambda x, fn: fn(x), [inp] + layers)
    
    
    def forward_pass(obs):
        ''' Build the forward pass for policy net.
        Input: batched observation. (shape: [batch_size, obs_dim])
        Output: batched action. (shape: [batch_size, action_dim])
        '''
        obs = obs.astype(np.float32)
        normed_obs = (obs - obsnorm_mean) / (obsnorm_stdev + 1e-6) # 1e-6 constant from Standardizer class in nn.py:409 in openai/imitation
        output = layers_forward(normed_obs.astype(np.float32))

        return output

    return forward_pass
Esempio n. 34
0
    def update(self):

        # Update parameter and experimental widgets
        for p in self._param_widgets:
            p.update()
        for e in self._expt_widgets:
            e.update()

        # Grab connector fit parameters and required meta data associated with
        # this experiment 
        required_meta, connector_param = self._fit.get_experiment_connector(self._experiment)

        # ----------- connector parameters --------------
        if set(connector_param) != self._current_connector_param:

            # Build connector widgets for associated parameters  
            connector_keys = list(connector_param.keys())
            connector_keys.sort()
            to_layout = []
            for k in connector_keys:
            
                try:
                    self._connector_widgets[k].update()
                except KeyError:
                    self._connector_widgets[k] = FitParamWrapper(self,
                                                                 self._fit,
                                                                 self._experiment,
                                                                 connector_param[k])
                to_layout.append(self._connector_widgets[k])

            # Delete any existing connector fit parameter widgets from layout
            widget_indexes = list(range(self._num_local_param_widgets,
                                  self._fit_param_layout.count()))
            widget_indexes.reverse()
            for i in widget_indexes:
                self._fit_param_layout.itemAt(i).widget().setParent(None)

            # Add associated connector fit parameter widgets to layout
            for i, w in enumerate(to_layout): 
 
                r = i + self._num_param_rows

                # Lock down the ability to chose a new linkage for this parameter
                w.set_as_connector_param(True)

                self._fit_param_layout.addWidget(QW.QLabel(w.name),r,0)
                self._fit_param_layout.addWidget(w.guess_widget,r,1)
                self._fit_param_layout.addWidget(w.alias_widget,r,2)
                self._fit_param_layout.addWidget(w.fixed_widget,r,3)
                self._fit_param_layout.addWidget(w.lower_widget,r,4)
                self._fit_param_layout.addWidget(w.upper_widget,r,5)

            self._current_connector_param = set(connector_param)

        # ------------- required experiment metadata -----------------

        if set(required_meta) != self._current_required_meta:
 
            required_meta_keys = list(required_meta.keys())
            required_meta_keys.sort()
            to_layout = [] 
            for m in required_meta_keys:
                try:
                    self._meta_widgets[m].update()
                except KeyError:
                    self._meta_widgets[m] = ExperimentMetaWrapper(self,
                                                                  self._fit,
                                                                  self._experiment,
                                                                  m)
                to_layout.append(self._meta_widgets[m])       

            # Delete existing widgets from layout
            widget_indexes = list(range(self._num_exp_rows*3,
                                        self._experiment_settable_layout.count()))
            widget_indexes.reverse()
            for i in widget_indexes:
               self._experiment_settable_layout.itemAt(i).widget().setParent(None)
            
            # Add dummy widgets to fill out grid 
            hider = QW.QSizePolicy()
            hider.setRetainSizeWhenHidden(True)
            dummies = []
            counter = 0
            while len(to_layout) % self._num_exp_columns != 0:
        
                # Add fake widget
                dummies.append(ExperimentSettableWrapper(self,self._fit,
                                                         self._experiment,
                                                        "dummy{}".format(counter),
                                                        "",str,None))
                dummies[-1].setSizePolicy(hider)
                to_layout.append(dummies[-1])
                to_layout[-1].hide()

                counter += 1

            # Lay out the connector widgets in rows of num_exp_columns.
            counter = 0
            num_rows = int(round((len(to_layout)+1)/self._num_exp_columns))
            for i in range(num_rows):
                r = i + self._num_exp_rows
                for j in range(self._num_exp_columns):
                    self._experiment_settable_layout.addWidget(to_layout[counter],r,j)
                    counter += 1

            self._current_required_meta = set(required_meta)

        # For some reason this must be run twice to get correct size in all cases
        self.adjustSize()
        self.adjustSize()
Esempio n. 35
0
def get_product(queries):
    return set(map(frozenset, product(*repeat(tuple(queries), len(queries)))))
Esempio n. 36
0
def get_cluster_queries(clusters):
    """Based on a collection of clusters (for example those returned by get_clusters()),
    determine the query needed to fetch the articles in that particular cluster.
    """
    all_queries = set(chain.from_iterable(clusters))
    return (_get_cluster_query(all_queries, queries) for queries in clusters)
Esempio n. 37
0
n, k = map(int, input().split())
#print(n,k)

nums = list(map(int, input().split()))
nums.sort()

res = []

for i in range(n):
    for j in range(i + 1, n):
        for k2 in range(j + 1, n):
            res.append(nums[i] + nums[j] + nums[k2])

res = list(set(res))
res.sort(reverse=True)

print(res[k - 1])
Esempio n. 38
0
def users_with_common_genre_interest(identify):
    return set([
        user["id"]
        for user in users
        if user["id"] != identify["id"] and (identify["genre interest"] == user["genre"])
    ])
Esempio n. 39
0
                        for f in sorted(files_child
                                        ):  # sorted to ensure merge stability
                            if f not in intersect:
                                who = gglob_who_orig.copy()
                                globs[os.path.relpath(f,
                                                      start=globs_dir)] = who
                        for who in oglob_who:
                            if who not in gglob_who:
                                gglob_who.append(who)
            add_parent_to_globs(owners.parent, globs, globs_dir)
            return
    assert (False)


todo = owners_data.copy()
done = set()
with open(args.out, 'w') as out:
    out.write('# Auto-generated by the tools/mkowners/mkowners.py tool\n')
    out.write('# Uses OWNERS files in different modules throughout the\n')
    out.write('# repository as the source of truth for module ownership.\n')
    written_globs = []
    while todo:
        head, *todo = todo
        if head.parent and not head.parent in done:
            todo.append(head)
            continue
        globs = expand_directives(head.dir, head.directives)
        add_parent_to_globs(head.parent, globs, head.dir)
        for glob, owners in globs.items():
            skip = False
            for glob1, owners1, dir1 in reversed(written_globs):
Esempio n. 40
0
def users_with_common_interests_and_genre_interest(user):
    return set([
        interests_and_genre_interest_user_id
        for interests_and_genre_interest_user_id in users_with_common_genre_interest(user)
        if interests_and_genre_interest_user_id in users_with_common_interests(user)
    ])
Esempio n. 41
0
#Generate triangle numbes
triNum = []
for i in range(143, 100000):
    x = i * (i + 1) / 2
    triNum.append(x)

#Generate Pentagonal numbers
pentNum = []
for i in range(143, 100000):
    x = i * (3 * i - 1) / 2
    pentNum.append(x)

#Generate hexagonal Numbers
hexNum = []
for i in range(143, 100000):
    x = i * (2 * i - 1)
    hexNum.append(x)

temp = list(set(triNum).intersection(pentNum))
fin = list(set(temp).intersection(hexNum))

print(fin)
Esempio n. 42
0
def read_frame(qs,
               fieldnames=(),
               index_col=None,
               coerce_float=False,
               verbose=True,
               datetime_index=False):
    """
    Returns a dataframe from a QuerySet

    Optionally specify the field names/columns to utilize and
    a field as the index

    Parameters
    ----------

    qs: The Django QuerySet.
    fieldnames: The model field names to use in creating the frame.
         You can span a relationship in the usual Django way
         by using  double underscores to specify a related field
         in another model
         You can span a relationship in the usual Django way
         by using  double underscores to specify a related field
         in another model

    index_col: specify the field to use  for the index. If the index
               field is not in the field list it will be appended

    coerce_float : boolean, default False
        Attempt to convert values to non-string, non-numeric data (like
        decimal.Decimal) to floating point, useful for SQL result sets

    verbose:  boolean If  this is ``True`` then populate the DataFrame with the
                human readable versions of any foreign key fields else use
                the primary keys values.
                The human readable version of the foreign key field is
                defined in the ``__unicode__`` or ``__str__``
                methods of the related class definition

    datetime_index: specify whether index should be converted to a
                    DateTimeIndex.
    """

    if fieldnames:
        fieldnames = pd.unique(fieldnames)
        if index_col is not None and index_col not in fieldnames:
            # Add it to the field names if not already there
            fieldnames = tuple(fieldnames) + (index_col, )
        fields = to_fields(qs, fieldnames)
    elif is_values_queryset(qs):
        if django.VERSION < (1, 9):  # pragma: no cover
            annotation_field_names = list(qs.query.annotation_select)

            if annotation_field_names is None:
                annotation_field_names = []

            extra_field_names = qs.extra_names
            if extra_field_names is None:
                extra_field_names = []

            select_field_names = qs.field_names

        else:  # pragma: no cover
            annotation_field_names = list(qs.query.annotation_select)
            extra_field_names = list(qs.query.extra_select)
            select_field_names = list(qs.query.values_select)

        fieldnames = select_field_names + annotation_field_names + \
            extra_field_names
        fields = [None if '__' in f else qs.model._meta.get_field(f)
                  for f in select_field_names] + \
            [None] * (len(annotation_field_names) + len(extra_field_names))

        uniq_fields = set()
        fieldnames, fields = zip(
            *(f for f in zip(fieldnames, fields)
              if f[0] not in uniq_fields and not uniq_fields.add(f[0])))
    else:
        fields = qs.model._meta.fields
        fieldnames = [f.name for f in fields]
        fieldnames += list(qs.query.annotation_select.keys())

    if is_values_queryset(qs):
        recs = list(qs)
    else:
        recs = list(qs.values_list(*fieldnames))

    df = pd.DataFrame.from_records(recs,
                                   columns=fieldnames,
                                   coerce_float=coerce_float)

    if verbose:
        update_with_verbose(df, fieldnames, fields)

    if index_col is not None:
        df.set_index(index_col, inplace=True)

    if datetime_index:
        df.index = pd.to_datetime(df.index, errors="ignore")
    return df
Esempio n. 43
0
 def __init__(self, grams=set(string.hexdigits.lower()), limit=3):
     self.grams = grams
     self.limit = limit
Esempio n. 44
0
import sys
import os
import argparse
from collections import namedtuple
from datetime import date, datetime, timedelta
import sqlite3
from foodlog.my_info import config_path

INVALID_TEMPLATE = """ {} {} """

config = config_path()  # pylint: disable=invalid-name
DB_FILE = config.dir('DB_FILE')
MENU_URL = config.dir('MENU_URL')
VIEW_MENU_URL = config.dir('VIEW_MENU_URL')

VALID = set('start end range title reverse edit'.split())
VALID_RANGES = set('today yesterday lastweek thisweek'.split())


def print_error(header, text):

    print(INVALID_TEMPLATE.format(header, text))
    sys.exit(2)


def week_range(num_weeks, firstweekday=3):
    """ Return the range num_weeks ago

        Figure out the week where num_weeks == 0 is this week (contains today)
        and week == 1 is last week, and so on. Weeks are defined by start_day
        using the datetime.weekday(), so if start_day == 0, the week starts on
Esempio n. 45
0
def createTree(dataSet, minSup=1):
    """
    生成FP树
    Args:
        dataSet  dist{行:出现次数}的样本数据
        minSup   最小的支持度
    Returns:
        retTree  FP-tree
        headerTable 满足minSup {所有的元素+(value, treeNode)}
    """

    # 支持度>=minSup的dist{所有元素:出现的次数}
    headerTable = {}

    # 循环 dist{行:出现次数}的样本数据
    for trans in dataSet:
        # 对所有的行进行循环,得到行里面的所有元素
        # 统计每一行中,每个元素出现的总次数
        for item in trans:
            # 例如: {'ababa': 3}  count(a)=3+3+3=9   count(b)=3+3=6
            headerTable[item] = headerTable.get(item, 0) + dataSet[trans]

    # 删除 headerTable中,元素次数<最小支持度的元素
    for k in list(headerTable.keys()):  # python3中.keys()返回的是迭代器不是list,不能在遍历时对其改变。
        if headerTable[k] < minSup:
            del(headerTable[k])

    # 满足minSup: set(各元素集合)
    freqItemSet = set(headerTable.keys())

    # 如果不存在,直接返回None
    if len(freqItemSet) == 0:
        return None, None

    for k in headerTable:
        # 格式化: dist{元素key: [元素次数, None]}
        headerTable[k] = [headerTable[k], None]

    # 树根
    retTree = treeNode('Null Set', 1, None)

    # 循环 dist{行:出现次数}的样本数据
    for tranSet, count in dataSet.items():
        # localD = dist{元素key: 元素总出现次数}
        localD = {}
        for item in tranSet:
            # 判断是否在满足minSup的集合中
            if item in freqItemSet:
                localD[item] = headerTable[item][0]

        # 对每一行的key 进行排序,然后开始往树添加枝丫,直到丰满
        # 第二次,如果在同一个排名下出现,那么就对该枝丫的值进行追加,继续递归调用!
        if len(localD) > 0:
            # p=key,value; 所以是通过value值的大小,进行从大到小进行排序
            # orderedItems 表示取出元组的key值,也就是字母本身,但是字母本身是大到小的顺序
            orderedItems = [v[0] for v in sorted(localD.items(), key=lambda p: p[1], reverse=True)]

            # 填充树,通过有序的orderedItems的第一位,进行顺序填充 第一层的子节点。
            updateTree(orderedItems, retTree, headerTable, count)

    return retTree, headerTable
def extract_custom_form_info(req_id, form_id, form_soup):
    """Extract all of the fields passed into the form.

    Arguments:
        req_id (String):
            The unique string of ints that map to a request (URI).
        form_id (String):
            The unique string of ints that map to a form.
        form_soup (BeautifulSoup object):
            The soup of the form you want to parse.

    Returns:
        form_info (CustomForm):
            The CustomForm object with all of the form's fields initialized.

    Raises:
        TypeError:
            The form has no fields configured.
        ValueError:
            The form has duplicate samples.
    """

    # If we need any of these types, we can make new methods.
    skip_types = ["charges", "file", "table", "help", "file_no_upload"]
    field_strategy = {
        "handsontable_grid": extract_custom_forms.grid_type,
        "checkbox": extract_custom_forms.checkbox_type,
        "all_others": extract_custom_forms.all_other_types
    }

    # Find the desired custom form out of all of the form_soup.
    target_form = form_soup.find(string=form_id)
    target_form = target_form.find_parent("custom-form")
    form_soup = target_form

    form_name = form_soup.find("name").string
    fields_soup = form_soup.find("fields")
    form_info = api_types.CustomForm(form_name, req_id, form_id)

    # Get all of the field information.
    for field_soup in fields_soup.find_all("field"):
        field_type = field_soup.find("type").string
        if field_type in skip_types:
            # Do nothing with the field types that we don't yet care about.
            continue

        try:
            field_strategy[field_type](field_soup, form_info)
        except KeyError:
            field_strategy["all_others"](field_soup, form_info)
        except TypeError:
            raise TypeError(
                f"The grid in the {form_info.name} form in request"
                f" {form_info.req_id} has been filled out incorrectly. The"
                f" error message is: {traceback.format_exc()}")

    # Raise an error if a form doesn't have samples.
    if not form_info.samples:
        return form_info

    if form_info.field_to_values.get("duplicate_samples"):
        if form_info.field_to_values["duplicate_samples"] == "Yes":
            b_samples = copy.deepcopy(form_info.samples)
            for a_sample, b_sample in zip(form_info.samples, b_samples):
                a_sample.name += "A"
                b_sample.name += "B"
            form_info.samples = form_info.samples + b_samples

    extract_custom_forms.bind_container_info(form_info)

    # Allows duplicate names if they have different well locations in a
    # plate.
    if form_info.con_type != "96 well plate":
        sample_names = [sample.name for sample in form_info.samples]
        if len(set(sample_names)) != len(sample_names):
            raise ValueError(
                f"There are two or more samples named the same thing in"
                f" request {form_info.req_id}. Please review and edit your"
                f" sample names.")

    for name, value in form_info.field_to_values.items():
        if name in ONLY_INT_FIELDS:
            value = re.sub(r"[^.0-9]", "", value)
        if "_each_sample" in name:
            udf_name = name.replace("_each_sample", "").replace("_", " ")
            for sample in form_info.samples:
                sample.udf_to_value[udf_name] = value

    return form_info
Esempio n. 47
0
    def keys(self):
        res = set()
        for key in chain_from_iterable(self._maps):
            res.add(key)

        return list(res)
Esempio n. 48
0
# coding: utf-8
import sys, json, codecs
reload(sys)
sys.setdefaultencoding('utf-8')

fin = codecs.open("P_list", encoding="utf-8")
small_train_data_P_set = set([])
for line in fin:
    small_train_data_P_set.add(line.strip())
fin.close()

# 对于label 为 0的, 去掉 neg:far_apart 这条规则
def main(in_file, to_file):
    fin = open(in_file)
    fout = open(to_file, "w")

    for line in fin:
        line_list = line.strip().split("\t")
        dict_label_info = json.loads(line_list[-1])

        flag = 0

        for P in dict_label_info:
            # 是我要处理的P
            if P in small_train_data_P_set:
                # 遍历candidates
                for s_o in dict_label_info[P]["candidates"]:
                    label = dict_label_info[P]["candidates"][s_o]["label"]
                    label_rule_list = dict_label_info[P]["candidates"][s_o]["label_info"]
                    # 是NULL类型的, 重新生成label
                    if label == 0:
Esempio n. 49
0
# -*- coding: utf-8 -*-

# read puzzle input
puzzle_input = []
all_allergens = []
all_ingredients = []
for l in open("input_test.txt", "r").readlines():
    ingredients, allergens = l.split(" (contains ")
    ingredients = ingredients.split(" ")
    all_ingredients.extend(ingredients)
    allergens = allergens.strip()
    allergens = allergens[:-1].split(", ")
    all_allergens.extend(allergens)
    puzzle_input.append([ingredients, allergens])

all_allergens = set(all_allergens)
all_ingredients = set(all_ingredients)

ingredients_dict = {}
for ingredient in all_ingredients:
    ingredients_dict[ingredient] = []
    for food in puzzle_input:
        if ingredient in food[0]:
            ingredients_dict[ingredient].extend(food[1])

allergens_dict = {}
for allergen in all_allergens:
    allergens_dict[allergen] = []
    for food in puzzle_input:
        if allergen in food[1]:
            allergens_dict[allergen].append(food[0])
Esempio n. 50
0
    def _run_test(self, extr, url, result):
        if result:
            if "options" in result:
                for key, value in result["options"]:
                    key = key.split(".")
                    config.set(key[:-1], key[-1], value)
            if "range" in result:
                config.set((), "image-range"  , result["range"])
                config.set((), "chapter-range", result["range"])
            content = "content" in result
        else:
            content = False

        tjob = ResultJob(url, content=content)
        self.assertEqual(extr, tjob.extractor.__class__)

        if not result:
            return
        if "exception" in result:
            with self.assertRaises(result["exception"]):
                tjob.run()
            return
        try:
            tjob.run()
        except exception.StopExtraction:
            pass
        except exception.HttpError as exc:
            exc = str(exc)
            if re.match(r"'5\d\d ", exc) or \
                    re.search(r"\bRead timed out\b", exc):
                self._skipped.append((url, exc))
                self.skipTest(exc)
            raise

        if result.get("archive", True):
            self.assertEqual(
                len(set(tjob.archive_list)),
                len(tjob.archive_list),
                "archive-id uniqueness",
            )

        if tjob.queue:
            # test '_extractor' entries
            for url, kwdict in zip(tjob.url_list, tjob.kwdict_list):
                if "_extractor" in kwdict:
                    extr = kwdict["_extractor"].from_url(url)
                    self.assertIsInstance(extr, kwdict["_extractor"])
                    self.assertEqual(extr.url, url)
        else:
            # test 'extension' entries
            for kwdict in tjob.kwdict_list:
                self.assertIn("extension", kwdict)

        # test extraction results
        if "url" in result:
            self.assertEqual(result["url"], tjob.url_hash.hexdigest())

        if "content" in result:
            expected = result["content"]
            digest = tjob.content_hash.hexdigest()
            if isinstance(expected, str):
                self.assertEqual(digest, expected, "content")
            else:  # assume iterable
                self.assertIn(digest, expected, "content")

        if "keyword" in result:
            expected = result["keyword"]
            if isinstance(expected, dict):
                for kwdict in tjob.kwdict_list:
                    self._test_kwdict(kwdict, expected)
            else:  # assume SHA1 hash
                self.assertEqual(expected, tjob.kwdict_hash.hexdigest())

        if "count" in result:
            count = result["count"]
            if isinstance(count, str):
                self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
                expr = "{} {}".format(len(tjob.url_list), count)
                self.assertTrue(eval(expr), msg=expr)
            else:  # assume integer
                self.assertEqual(len(tjob.url_list), count)

        if "pattern" in result:
            self.assertGreater(len(tjob.url_list), 0)
            for url in tjob.url_list:
                self.assertRegex(url, result["pattern"])
Esempio n. 51
0
    def __init__(self, tokens):
        """Build our smoothed trigram model.

        This should be very similar to SimpleTrigramLM.__init__ from the demo
        notebook, with the exception that we _don't_ want to actually normalize
        the probabilities at training time. Instead, we'll compute the corpus
        counts C_abc = C(w_2, w_1, w) and C_ab = C(w_2, w_1), after which we can
        compute the probabilities on the fly for any value of k. (We'll do this
        in the next_word_proba() function.)

        The starter code will fill in:
          self.counts  (trigram counts)
          self.words   (list of words known to the model)

        Your code should populate:
          self.context_totals (total count C_ab for context ab)

        Args:
          tokens: (list or np.array) of training tokens

        Returns:
          None
        """
        self.k = 0.0
        # Raw trigram counts over the corpus.
        # c(w | w_1 w_2) = self.counts[(w_2,w_1)][w]
        # Be sure to use tuples (w_2,w_1) as keys, *not* lists [w_2,w_1]
        self.counts = defaultdict(lambda: defaultdict(lambda: 0.0))

        # Map of (w_1, w_2) -> int
        # Entries are c( w_2, w_1 ) = sum_w c(w_2, w_1, w)
        self.context_totals = defaultdict(lambda: 0.0)

        # Track unique words seen, for normalization
        # Use wordset.add(word) to add words
        wordset = set()

        # Iterate through the word stream once
        # Compute trigram counts as in SimpleTrigramLM
        w_1, w_2 = None, None
        for word in tokens:
            wordset.add(word)
            if w_1 is not None and w_2 is not None:
                self.counts[(w_2,w_1)][word] += 1
            # Update context
            w_2 = w_1
            w_1 = word

        #### YOUR CODE HERE ####
        # Compute context counts
        for context, words in self.counts.items():
            self.context_totals[context] = sum(words[w] for w in words)

        #### END(YOUR CODE) ####
        # Freeze defaultdicts so we don't accidentally modify later.
        self.counts.default_factory = None
        for k in self.counts:
            if isinstance(self.counts[k], defaultdict):
                self.counts[k].default_factory = None

        # Total vocabulary size, for normalization
        self.words = list(wordset)
        self.V = len(self.words)
Esempio n. 52
0
import spacy
import json
import os
from collections import defaultdict

nlp = spacy.load('en')
PATH = os.getcwd()
sentences = []


with open(PATH + '\\data\\proc_sen.json', 'r+')as fp:
    proc_sen = json.load(fp)

uniq_words = set()
count_lemma = defaultdict(int)

c = 0
for sen in proc_sen:
    c += 1
    if c % 100 == 0:
        print(c)
    for token in sen:
        uniq_words.add(token[1])

with open(PATH + '\\data\\uniq_words.json', 'w+') as fp:
    json.dump(list(uniq_words), fp)

with open(PATH + '\\data\\uniq_words.txt', 'w+') as fp:
    for w in uniq_words:
        fp.write(w + '\n')
Esempio n. 53
0
STOP_WORDS = set([
    "i", "me", "my", "myself", "we", "our", "ours", "ourself", "ourselves",
    "you", "your", "yours", "yourself", "yourselves", "he", "him", "his",
    "himself", "she", "her", "herself", "it", "its", "itself", "they", "them",
    "their", "themselves", "what", "which", "who", "whom", "this", "that",
    "these", "those", "am", "is", "are", "was", "were", "be", "been", "have",
    "has", "had", "do", "does", "did", "a", "an", "the", "and", "but", "if",
    "or", "because", "as", "until", "while", "of", "at", "by", "for", "with",
    "about", "against", "between", "into", "through", "during", "before",
    "after", "above", "below", "to", "from", "up", "down", "in", "out", "on",
    "off", "over", "under", "again", "further", "then", "once", "here",
    "there", "when", "where", "why", "how", "all", "any", "both", "each",
    "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only",
    "own", "same", "so", "than", "too", "very", "can", "will", "just", "don",
    "should", "now", "say", "tell", "told", "said", "would", "could", "might",
    "shall", "nt", "also", "L:", "P:", "O:", "s", "t", "m", "re", "ll", "d"
])
#STOP_WORDS = ["i", "me", "my", "myself", "we", "our", "ourselves", "you", "your", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "herself", "it", "itself", "they", "them", "their", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "have", "has", "had", "do", "does", "did", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will" , "just", "don", "should", "now", "said", "would", "nt","&"]

WORD2NUM = {
    'one': '1',
    'two': '2',
    'three': '3',
    'four': '4',
    'five': '5',
    'six': '6',
    'seven': '7',
    'eight': '8',
    'nine': '9',
    'zero': '0'
}
Esempio n. 54
0
    def __init__(self, tokens):
        """Build our smoothed trigram model.

        This should be similar to the AddKTrigramLM.__init__ function, above,
        but will compute a number of additional quantities that we need for the
        more sophisticated KN model.

        See the documentation in the notebook for the KN backoff model
        definition and equations, and be sure to read the in-line comments
        carefully to understand what each data structure represents.

        Note the usual identification of variables:
          w : c : current word
          w_1 : w_{i-1} : b : previous word
          w_2 : w_{i-2} : a : previous-previous word

        There are two blocks of code to fill here. In the first one, you should
        fill in the inner loop to compute:
          self.counts         (unigram, bigram, and trigram)
          self.type_contexts  (set of preceding words for each word (type))

        In the second one, you should compute:
          self.context_totals  (as in AddKTrigramLM)
          self.context_nnz     (number of nonzero elements for each context)
          self.type_fertility  (number of unique preceding words for each word
                                      (type))

        The starter code will fill in:
          self.z_tf   (normalization constant for type fertilities)
          self.words  (list of words known to the model)

        Args:
          tokens: (list or np.array) of training tokens

        Returns:
          None
        """
        self.delta = 0.75
        # Raw counts over the corpus.
        # Keys are context (N-1)-grams, values are dicts of word -> count.
        # You can access C(w | w_{i-1}, ...) as:
        # unigram: self.counts[()][w]
        # bigram:  self.counts[(w_1,)][w]
        # trigram: self.counts[(w_2,w_1)][w]
        self.counts = defaultdict(lambda: defaultdict(lambda: 0))
        # As in AddKTrigramLM, but also store the unigram and bigram counts
        # self.context_totals[()] = (total word count)
        # self.context_totals[(w_1,)] = c(w_1)
        # self.context_totals[(w_2, w_1)] = c(w_2, w_1)
        self.context_totals = defaultdict(lambda: 0.0)
        # Also store in self.context_nnz the number of nonzero entries for each
        # context; as long as \delta < 1 this is equal to nnz(context) as
        # defined in the notebook.
        self.context_nnz = defaultdict(lambda: 0.0)

        # Context types: store the set of preceding words for each word
        # map word -> {preceding_types}
        self.type_contexts = defaultdict(lambda: set())
        # Type fertility is the size of the set above
        # map word -> |preceding_types|
        self.type_fertility = defaultdict(lambda: 0.0)
        # z_tf is the sum of type fertilities
        self.z_tf = 0.0


        # Iterate through the word stream once
        # Compute unigram, bigram, trigram counts and type fertilities
        w_1, w_2 = None, None
        for word in tokens:
            
            #### YOUR CODE HERE ####
            
            # Unigram counts
            self.counts[()][word] += 1
            
            if w_1 is not None:
                # Bigram counts
                self.counts[(w_1,)][word] += 1
                
                # Unique context words for each word
                self.type_contexts[word].add(w_1)
                
                if w_2 is not None:
                    # Trigram counts
                    self.counts[(w_2,w_1)][word] += 1
            
            #### END(YOUR CODE) ####
            
            # Update context
            w_2 = w_1
            w_1 = word
            
        ##
        # We'll compute type fertilities and normalization constants now,
        # but not actually store the normalized probabilities. That way, we can compute
        # them (efficiently) on the fly.

        #### YOUR CODE HERE ####
        # Count the total for each context.
        for context, words in self.counts.items():
            self.context_totals[context] = sum(words[w] for w in words)
        
        # Count the number of nonzero entries for each context.
            for word, cnt in words.items():
                if cnt > self.delta:
                    self.context_nnz[context] += 1

        # Compute type fertilities, and the sum z_tf.
        for word, context in self.type_contexts.items():
            self.type_fertility[word] = len(context)
            
        self.z_tf = float(sum(self.type_fertility.values()))
        #### END(YOUR CODE) ####


        # Freeze defaultdicts so we don't accidentally modify later.
        self.counts.default_factory = None
        self.type_contexts.default_factory = None

        # Total vocabulary size, for normalization
        self.words = list(self.counts[()].keys())
        self.V = len(self.words)
Esempio n. 55
0
 def __init__(self, func, _type):
     super().__init__(func, "on_cap_{}".format(_type))
     self.caps = set()
Esempio n. 56
0
 def __init__(self, url, dirname, secret):
     self.url = url
     self.dirname = dirname
     self.secret = secret
     self.known = set()
Esempio n. 57
0
 def __init__(self, function):
     """
     :type function: function
     """
     _Hook.__init__(self, function, "irc_raw")
     self.triggers = set()
Esempio n. 58
0
 def __init__(self, func):
     super().__init__(func, "perm_check")
     self.perms = set()
Esempio n. 59
0
class JsonLexer(Lexer):
    """
    For JSON data structures.

    .. versionadded:: 1.5
    """

    name = 'JSON'
    aliases = ['json']
    filenames = ['*.json', 'Pipfile.lock']
    mimetypes = ['application/json']

    # No validation of integers, floats, or constants is done.
    # As long as the characters are members of the following
    # sets, the token will be considered valid. For example,
    #
    #     "--1--" is parsed as an integer
    #     "1...eee" is parsed as a float
    #     "trustful" is parsed as a constant
    #
    integers = set('-0123456789')
    floats = set('.eE+')
    constants = set('truefalsenull')  # true|false|null
    hexadecimals = set('0123456789abcdefABCDEF')
    punctuations = set('{}[],')
    whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'}

    def get_tokens_unprocessed(self, text):
        """Parse JSON data."""

        in_string = False
        in_escape = False
        in_unicode_escape = 0
        in_whitespace = False
        in_constant = False
        in_number = False
        in_float = False
        in_punctuation = False

        start = 0

        # The queue is used to store data that may need to be tokenized
        # differently based on what follows. In particular, JSON object
        # keys are tokenized differently than string values, but cannot
        # be distinguished until punctuation is encountered outside the
        # string.
        #
        # A ":" character after the string indicates that the string is
        # an object key; any other character indicates the string is a
        # regular string value.
        #
        # The queue holds tuples that contain the following data:
        #
        #     (start_index, token_type, text)
        #
        # By default the token type of text in double quotes is
        # String.Double. The token type will be replaced if a colon
        # is encountered after the string closes.
        #
        queue = []

        for stop, character in enumerate(text):
            if in_string:
                if in_unicode_escape:
                    if character in self.hexadecimals:
                        in_unicode_escape -= 1
                        if not in_unicode_escape:
                            in_escape = False
                    else:
                        in_unicode_escape = 0
                        in_escape = False

                elif in_escape:
                    if character == 'u':
                        in_unicode_escape = 4
                    else:
                        in_escape = False

                elif character == '\\':
                    in_escape = True

                elif character == '"':
                    queue.append((start, String.Double, text[start:stop + 1]))
                    in_string = False
                    in_escape = False
                    in_unicode_escape = 0

                continue

            elif in_whitespace:
                if character in self.whitespaces:
                    continue

                if queue:
                    queue.append((start, Text, text[start:stop]))
                else:
                    yield start, Text, text[start:stop]
                in_whitespace = False
                # Fall through so the new character can be evaluated.

            elif in_constant:
                if character in self.constants:
                    continue

                yield start, Keyword.Constant, text[start:stop]
                in_constant = False
                # Fall through so the new character can be evaluated.

            elif in_number:
                if character in self.integers:
                    continue
                elif character in self.floats:
                    in_float = True
                    continue

                if in_float:
                    yield start, Number.Float, text[start:stop]
                else:
                    yield start, Number.Integer, text[start:stop]
                in_number = False
                in_float = False
                # Fall through so the new character can be evaluated.

            elif in_punctuation:
                if character in self.punctuations:
                    continue

                yield start, Punctuation, text[start:stop]
                in_punctuation = False
                # Fall through so the new character can be evaluated.

            start = stop

            if character == '"':
                in_string = True

            elif character in self.whitespaces:
                in_whitespace = True

            elif character in {'f', 'n',
                               't'}:  # The first letters of true|false|null
                # Exhaust the queue. Accept the existing token types.
                yield from queue
                queue.clear()

                in_constant = True

            elif character in self.integers:
                # Exhaust the queue. Accept the existing token types.
                yield from queue
                queue.clear()

                in_number = True

            elif character == ':':
                # Yield from the queue. Replace string token types.
                for _start, _token, _text in queue:
                    if _token is Text:
                        yield _start, _token, _text
                    elif _token is String.Double:
                        yield _start, Name.Tag, _text
                    else:
                        yield _start, Error, _text
                queue.clear()

                in_punctuation = True

            elif character in self.punctuations:
                # Exhaust the queue. Accept the existing token types.
                yield from queue
                queue.clear()

                in_punctuation = True

            else:
                # Exhaust the queue. Accept the existing token types.
                yield from queue
                queue.clear()

                yield start, Error, character

        # Yield any remaining text.
        yield from queue
        if in_string:
            yield start, Error, text[start:]
        elif in_float:
            yield start, Number.Float, text[start:]
        elif in_number:
            yield start, Number.Integer, text[start:]
        elif in_constant:
            yield start, Keyword.Constant, text[start:]
        elif in_whitespace:
            yield start, Text, text[start:]
        elif in_punctuation:
            yield start, Punctuation, text[start:]
Esempio n. 60
0
 def __init__(self, function):
     """
     :type function: function
     """
     _Hook.__init__(self, function, "event")
     self.types = set()