Ejemplo n.º 1
0
    def draw(self, renderer):
        ### FIXME this should be made faster (C++ Module? How to deal with C++ linkage problems?)
        ### Sticking the vtkPoints objects in a cache would help somewhat but not on the first view.
        ### - Jack
        if not self.drawn:
            vtk_points = vtkPoints()
            points = self.visualiser.getQuantityPoints(self.quantityName, dynamic=self.dynamic)
            nPoints = len(points)
            vtk_points.SetNumberOfPoints(nPoints)
            setPoint = vtkPoints.SetPoint
            for i in xrange(nPoints):
                z = points[i] * self.zScale + self.offset
                setPoint(vtk_points, i, self.visualiser.xPoints[i], self.visualiser.yPoints[i], z)

            polyData = vtkPolyData()
            polyData.SetPoints(vtk_points)
            polyData.SetPolys(self.visualiser.vtk_cells)
            mapper = vtkPolyDataMapper()
            mapper.SetInput(polyData)
            setValue = vtkFloatArray.SetValue
            if hasattr(self.colour[0], '__call__'):
                scalars = self.colour[0](self.visualiser.getQuantityDict())
                nScalars = len(scalars)
                vtk_scalars = vtkFloatArray()
                vtk_scalars.SetNumberOfValues(nScalars)
                for i in xrange(nScalars):
                    setValue(vtk_scalars, i, scalars[i])
                polyData.GetPointData().SetScalars(vtk_scalars)
                mapper.SetScalarRange(self.colour[1:3])
            mapper.Update()
            self.actor.SetMapper(mapper)
        Feature.draw(self, renderer)
Ejemplo n.º 2
0
 def get_score(self, msg):
     Feature.extract(msg)
     score = 0.0
     for (f, w) in self.feature_weight.items():
         if f in msg.feature:
             score += msg.feature[f] * w
     return score
Ejemplo n.º 3
0
def init_train_data(fnames, topics):
  print ('[ init_train_data ] =================')
  # amap  
  # key : aid 
  # value : attr[0] preferance, attr[1] aid , attr[2] aname

  train_rank = []
  for QID in range(len(topics)):
    fname = fnames[QID]
    topic = topics[QID]

    amap = filter_data(fname)
    fea = Feature(topic)

    ext_aids = ZC.get_raw_rank(topic, EXT_TRAIN_A_SIZE)
    print '[ init_train_data ] amap_1 size = %d ' %(len(amap))

    for tid in ext_aids : 
      if not (tid in amap)  : 
        amap[tid] = (0, tid, '')

    print '[ init_train_data ] amap_2 size = %d ' %(len(amap))
    
    for tid in amap : 
      fv = fea.get_feature_vector(tid)
      #print ('[ init_train_data ] %d get feature vector ok.' %(tid))
      train_rank.append( (int(amap[tid][0]), reform_vector(fv), QID) )

    print '[ init_train_data ]  topic : %s ok , train_rank_size = %d' %(topic, len(train_rank))
    ZC.dump_cache()

  with open('train_rank.dat' , 'w') as f :
    pprint.pprint(train_rank, f)

  return train_rank
Ejemplo n.º 4
0
def Initialize(credentials=None, opt_url=None):
  """Initialize the EE library.

  If this hasn't been called by the time any object constructor is used,
  it will be called then.  If this is called a second time with a different
  URL, this doesn't do an un-initialization of e.g.: the previously loaded
  Algorithms, but will overwrite them and let point at alternate servers.

  Args:
    credentials: OAuth2 credentials.
    opt_url: The base url for the EarthEngine REST API to connect to.
  """
  data.initialize(credentials, (opt_url + '/api' if opt_url else None), opt_url)
  # Initialize the dynamically loaded functions on the objects that want them.
  ApiFunction.initialize()
  Element.initialize()
  Image.initialize()
  Feature.initialize()
  Collection.initialize()
  ImageCollection.initialize()
  FeatureCollection.initialize()
  Filter.initialize()
  Geometry.initialize()
  List.initialize()
  Number.initialize()
  String.initialize()
  Date.initialize()
  Dictionary.initialize()
  _InitializeGeneratedClasses()
  _InitializeUnboundMethods()
Ejemplo n.º 5
0
def dryer_data2(*feature_names):
	# data[area][genus][(feature_values)] = langauge_count
	data = {}
	# Languages that all features have
	languages = set()
	
	g = Genealogy()
	feature = Feature(feature_names[0])
	
	for language in feature.languages():
		languages.add(language.code)
	
	for feature_name in feature_names:
		feature = Feature(feature_name)
		this_set = set()
		for language in feature.languages():
			this_set.add(language.code)
		
		languages &= this_set
	
	for language_code in languages:
		language = g.find_language_by_code(language_code)
		area = language.area
		genus = language.genus.name
		value = ','.join(v['description'] for v in sorted(language.features.values()))
		
		data.setdefault(area, {})
		data[area].setdefault(genus, {})
		data[area][genus].setdefault(value, 0)
		data[area][genus][value] += 1
	
	return data
Ejemplo n.º 6
0
def Initialize(credentials="persistent", opt_url=None):
    """Initialize the EE library.

  If this hasn't been called by the time any object constructor is used,
  it will be called then.  If this is called a second time with a different
  URL, this doesn't do an un-initialization of e.g.: the previously loaded
  Algorithms, but will overwrite them and let point at alternate servers.

  Args:
    credentials: OAuth2 credentials.  'persistent' (default) means use
        credentials already stored in the filesystem, or raise an explanatory
        exception guiding the user to create those credentials.
    opt_url: The base url for the EarthEngine REST API to connect to.
  """
    if credentials == "persistent":
        credentials = _GetPersistentCredentials()
    data.initialize(credentials, (opt_url + "/api" if opt_url else None), opt_url)
    # Initialize the dynamically loaded functions on the objects that want them.
    ApiFunction.initialize()
    Element.initialize()
    Image.initialize()
    Feature.initialize()
    Collection.initialize()
    ImageCollection.initialize()
    FeatureCollection.initialize()
    Filter.initialize()
    Geometry.initialize()
    List.initialize()
    Number.initialize()
    String.initialize()
    Date.initialize()
    Dictionary.initialize()
    Terrain.initialize()
    _InitializeGeneratedClasses()
    _InitializeUnboundMethods()
    def extract(self, data_set_name, part_num=1, part_id=0):
        """
        Extract the feature from original data set
        :param data_set_name: name of data set
        :param part_num: number of partitions of data
        :param part_id: partition ID which will be extracted
        :return:
        """
        # load data set from disk
        data = pd.read_csv('%s/%s.csv' % (self.config.get('DEFAULT', 'source_pt'), data_set_name)).fillna(value="")
        begin_id = int(1. * len(data) / part_num * part_id)
        end_id = int(1. * len(data) / part_num * (part_id + 1))

        # set feature file path
        feature_pt = self.config.get('DEFAULT', 'feature_pt')
        if 1 == part_num:
            self.data_feature_fp = '%s/%s.%s.smat' % (feature_pt, self.feature_name, data_set_name)
        else:
            self.data_feature_fp = '%s/%s.%s.smat.%03d_%03d' % (feature_pt,
                                                                self.feature_name,
                                                                data_set_name,
                                                                part_num,
                                                                part_id)

        feature_file = open(self.data_feature_fp, 'w')
        feature_file.write('%d %d\n' % (end_id - begin_id, int(self.get_feature_num())))
        # extract feature
        for index, row in data[begin_id:end_id].iterrows():
            feature = self.extract_row(row)
            Feature.save_feature(feature, feature_file)
        feature_file.close()

        LogUtil.log('INFO',
                    'save features (%s, %s, %d, %d) done' % (self.feature_name, data_set_name, part_num, part_id))
Ejemplo n.º 8
0
    def __init__(self,**kwargs):
        u"""
        :param framefilter: selected framenumber where we will 
        compute histogram
        :type:  array

        """        
        Feature.__init__(self,**kwargs)
Ejemplo n.º 9
0
def predict(test_dir, xpath):
  feature = Feature(test_dir + '/oracle.png', test_dir + '/test.html', xpath)
  feature.process()
  vecter = feature.output_binary()
  print vecter
  score = rank_bayes(vecter)
  sorted_score = sorted(score.iteritems(), key=operator.itemgetter(1))
  return sorted_score
Ejemplo n.º 10
0
    def test_can_enable_two_features(self):
        feature2 = Feature("second_test_feature")
        request = fake_request()

        self.feature.enable(request)
        feature2.enable(request)

        self.assertTrue(self.feature.is_enabled(request))
        self.assertTrue(feature2.is_enabled(request))
Ejemplo n.º 11
0
 def _weight_feature(self, msg):
     Feature.extract(msg)
     score = 0.0
     for i in range(len(self.feature_name)):
         f = self.feature_name[i]
         w = self.w[i]
         if f in msg.feature:
             score += msg.feature[f] * w
     return score
Ejemplo n.º 12
0
def init_rerank_data(aids , topic):
  QID = 1
  fea = Feature(topic)
  rerank_data = []
  for tid in aids : 
    fv = fea.get_feature_vector(tid)
    print ('[ init_rerank_data ] %d get feature vector ok.' %(tid))
    rerank_data.append( (tid, reform_vector(fv), QID) ) 

  return rerank_data
Ejemplo n.º 13
0
def set_enabled(request):
    f = Feature(request.POST['name'])
    enabled = request.POST['enabled'] == 'True'

    if enabled:
        f.enable(request)
    else:
        f.disable(request)

    return redirect("/feature/")
Ejemplo n.º 14
0
    def _test_polynomial(self, polynomial):
        data = range(10)
        data = [float(d) for d in data]
        targets = [d**polynomial for d in data]
        data = [[d] for d in data]

        feature = Feature(Objective.MINIMIZE, log_level=logging.WARN)
        feature.optimize(data, targets)

        assert feature.polynomial == polynomial, "{0} != {1}".format(feature.polynomial, polynomial)
Ejemplo n.º 15
0
 def __init__(self, biodb, step_size=5, levels=[], name_hier=[]):
     Feature.__init__(self, biodb= biodb)
     self.step_size= step_size
     
     if levels == []:
         levels=[None]*3
     self.levels= levels
     if name_hier == []:
         name_hier= [""]*3
     self.name_hier= name_hier
     self.links=[]
Ejemplo n.º 16
0
 def __init__(self, biodb, step_size=5, levels=[], name_hier=[], parent_hier=[]):
     Feature.__init__(self, biodb= biodb)
     self.step_size= step_size
     
     if parent_hier == []:
         parent_hier=[None]*3
     self.parent_hier= parent_hier
     
     if levels == []:
         levels=[None]*3
     self.levels= levels
     self.links=[]
Ejemplo n.º 17
0
def showResults(request):
    global QUERY
    global RET_ANS
    query = request.GET['query']
    query = query.encode('UTF-8')
    if query == QUERY:
        return JsonResponse(RET_ANS, safe=False)
    else:
        QUERY = query
        #words = jieba.cut_for_search(query) #搜索分词
        ch_q = jieba.cut(query) #精准模式的分词
        kw_ch = [i for i in ch_q]
        tag_obj = TagDict.objects.filter(tag_ch__in = kw_ch) #
        cujiansuo = sum([tag.tag_class for tag in tag_obj], [])
        kw_en = [tag.tag_en for tag in tag_obj] #存储关键词;
        cujiansuo_res = sorted(set(cujiansuo), key=cujiansuo.index)
        qa_obj = QuestionAnswer.objects.filter(id__in=cujiansuo_res)
        print len(qa_obj)
        kw_en_len = len(kw_en)
        count_en = [0]*kw_en_len
        res_list = [] #最终返回的列表;
        kw = kw_en
        for item in qa_obj:
            q = item.question.lower()
            a = item.answer.lower()
            for i in range(kw_en_len):
                k = kw_en[i]
                if k in q or k in a:
                    count_en[i] += 1
            if Is_rela(q, kw_en):
                item_t = [item.id, item.question, ret_em(kw, item.answer), item.answer]
                res_list.append(item_t)
        D = len(res_list)
        Idf = []
        if not D == 0:
            Idf = [abs(math.log(D/float(t+1))) for t in count_en]
        theta1 = 1.0
        theta2 = 1.0
        theta3 = 1.0
        mmax = 0.0
        an_b = None
        for item in res_list:
            ans_sen = nltk.sent_tokenize(item[3])
            en_a = sum([nltk.word_tokenize(t) for t in ans_sen],[])
            en_q =  nltk.word_tokenize(item[1])
            socre_f = Feature(kw_ch, en_q, kw, en_a, Idf)
            sorce = theta1 * socre_f.length_feature() + sum(map(lambda(x):x*theta2, socre_f.word_feature())) + sum(map(lambda(x):x*theta3, socre_f.tfidf()))
            if mmax < sorce:
                mmax = sorce
                an_b = item
        RET_ANS = res_list
        write_file(res_list, query)
        return JsonResponse(RET_ANS, safe=False)
Ejemplo n.º 18
0
    def __init__(self, quantityName, zScale=1.0,
                 offset=0.0, **kwargs):
        '''
        Parameters:
        quantityName: string - name of a quantity
        zScale: float - multiply point z-values by this
        offset: float - add this to point z-values
        '''
        Feature.__init__(self, **kwargs)

        self.quantityName = quantityName
        self.zScale = zScale
        self.offset = offset
Ejemplo n.º 19
0
    def __init__(self, field, word_file):
        """ Get the word list from the specified file.

        :param field: The field to which this feature belongs.
        :param word_file: The path to an alphabetized word list, one per line.
        :return: None
        """
        word_file = field.settings.resolve_path(word_file)
        with open(word_file, 'r') as f:
            words = f.readlines()
        words = [w.strip() for w in words if w.islower()]
        self._dict_words = words
        Feature.__init__(self, field)
Ejemplo n.º 20
0
def main():
    X = [[1, 2], [2, 3]]
    
    root = Feature('root')
    featureList = np.array([])
    for i in range(len(X[0])):
        feature = Feature('feature_%d' % i)
        root.transform('init', feature)
        featureList = np.append(featureList, feature)

    model = OneHotEncoder(n_values=[5,8], sparse=True)
    model.fit(X)
    doWithOneHotEncoder(model, featureList)
    root.printTree()
Ejemplo n.º 21
0
def main():
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    
    root = Feature('root')
    featureList = np.array([])
    for i in range(len(X[0])):
        feature = Feature('feature_%d' % i)
        root.transform('init', feature)
        featureList = np.append(featureList, feature)

    model = PCA(n_components=1)
    model.fit(X)
    doWithPCA(model, featureList)
    root.printTree()
Ejemplo n.º 22
0
    def msg2X(self, samples):
        '''
        Convert messages to data matrix format. 

        X: A dict. See explanation of _G()
        '''
        X = {}
        for m in samples.values():
            Feature.extract(m)
            x = []
            for name in self.feature_name:
                x.append(m.feature[name])
            X[m.msg_id] = x
        return X
Ejemplo n.º 23
0
 def transform(self, fp):
     found_feature = False
     for f in fp:
         if f.name == self.name:
             yield Feature.apply_config(
                 f,
                 feature_type=FeatureType.TARGET
             )
             found_feature = True
         else:
             yield Feature.apply_config(
                 f,
                 feature_type=FeatureType.PREDICTOR
             )
     assert found_feature, "Feature `{}` is not found in the FeaturePool".format(self.name)
Ejemplo n.º 24
0
def main():
    from sklearn.feature_selection import VarianceThreshold
    X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]
    
    root = Feature('root')
    featureList = np.array([])
    for i in range(len(X[0])):
        feature = Feature('feature_%d' % i)
        root.transform('init', feature)
        featureList = np.append(featureList, feature)

    model = VarianceThreshold()
    model.fit(X)
    doWithSelector(model, featureList)
    root.printTree()
Ejemplo n.º 25
0
    def __init__(self):
        self.train_file = FILE_PATH + '/../data/conll.nonexp.train'
        self.test_file = FILE_PATH + '/../data/conll.nonexp.test'
        self.model_file = FILE_PATH + '/../data/conll.nonexp.model'
        self.predicted_file = FILE_PATH + '/../data/conll.nonexp.test.predicted'

        self.feat_handle = Feature()
Ejemplo n.º 26
0
class Trainer(object):

    def __init__(self):
        super(Trainer, self).__init__()
        self.tokenizer = Tokenizer()
        self.feature = Feature()

    # Training data using the given text and class.
    def train(self, text, className):
        # increase class
        self.feature.increaseClass(className)
        # tokenize text
        tokens = self.tokenizer.tokenize(text)
        # increase token
        for token in tokens:
            self.feature.increaseToken(token, className)
Ejemplo n.º 27
0
 def feature(self, subtree):
     rv = Feature(**dict(subtree))
     
     if rv.feature_elements is None:
         rv.feature_elements = []
     
     if 'background' not in rv:
         rv.background = None
     
     # Assign background to feature elements.
     for sc in rv.feature_elements:
         sc.background = rv.background
     
     rv.tags = frozenset(rv.tags)
     
     return rv
Ejemplo n.º 28
0
def Reset():
  """Reset the library. Useful for re-initializing to a different server."""
  data.reset()
  ApiFunction.reset()
  Image.reset()
  Feature.reset()
  Collection.reset()
  ImageCollection.reset()
  FeatureCollection.reset()
  Filter.reset()
  Geometry.reset()
  Number.reset()
  String.reset()
  _ResetGeneratedClasses()
  global Algorithms
  Algorithms = _AlgorithmsContainer()
Ejemplo n.º 29
0
    def transform_single(self, f):
        st = feature_summary(f.data)

        return Feature.merge_instances(
            f,
            Feature(f.name, f.data, st)
        )
Ejemplo n.º 30
0
def init_test_data(fname, topic):
  print ('[ init_train_data ] =================')
  QID = 1
  # amap , key : aid 
  # value : attr[0] preferance, attr[1] aid , attr[2] aname
  amap = filter_data(fname)
  fea = Feature(topic)
  train_rank = []
  for tid in amap : 
    aid = int(tid)
    fv = fea.get_feature_vector(aid)
    print ('[ init_train_data ] %d get feature vector ok.' %(aid))
    train_rank.append( (aid, reform_vector(fv), QID) )
    #ZC.dump_cache()


  return train_rank
Ejemplo n.º 31
0
def get_feature_by_feat(dict, feat):
    feat_dict = {}
    if feat in dict:
        feat_dict[dict[feat]] = 1
    return Feature("", len(dict), feat_dict)
Ejemplo n.º 32
0
class Warp():
    def __init__(self, image, feat_file, grid_height, grid_width, grid_dir, warp_dir, alpha=1, margin=200):
        self.alpha = alpha
        self.feat = Feature() # feature object
        self.read_feature_points(feat_file, margin)
        self.grid = Grid(image, grid_height, grid_width, margin, grid_dir, warp_dir)
        self.image = image

        # the should not change after global warpping, setting this initially is easier
        self.grid.compute_salience()
        self.set_grid_info_to_feat()

    def warp(self):
        self.GlobalWarp()
        self.ContentWarp()

    def GlobalWarp(self):
        # find the homography
        src = np.zeros((self.feat.size(), 2))
        dest = np.zeros((self.feat.size(), 2))
        for i, feat_info in enumerate(self.feat.feat):
            src[i][0] = feat_info.col
            src[i][1] = feat_info.row
            dest[i][0] = feat_info.dest_col
            dest[i][1] = feat_info.dest_row
        H, _ = cv2.findHomography(src, dest, cv2.RANSAC)

        # apply global transform
        self.grid.GlobalWarp(H)

        # features need to be transformed as well
        for i, feat_info in enumerate(self.feat.feat):
            p = np.array([feat_info.col, feat_info.row, 1])
            p_prime = np.dot(H, p)
            p_prime /= p_prime[-1]
            p_prime = p_prime[:-1].round().astype('int')
            self.feat.feat[i].set_global(p_prime[1], p_prime[0])

        print ('global warp finished. ', end='', flush=True)

    def ContentWarp(self):
        self.compute_bilinear_interpolation()
        self.grid.compute_u_v()
        self.build_linear_system_and_solve()
        self.image = self.image.split('/')[-1]
        self.grid.show_grid('after transform', self.feat.feat, show=False, save=True, image=self.image)
        self.map_texture(self.image)

    def build_linear_system_and_solve(self):
        '''
        A: [w1 0 w2 0 w3 0 w4 0 0 0 ... 0] X: [V_1x]
           [0 w1 0 w2 0 w3 0 w4 0 0 ... 0]    [V_1y]
                          .                   [V_2x]
                          .                   [V_2y]
                          .                   [V_3x]
               simularity transform           [V_3y]
                          .                   [V_4x]
                          .                   [V_4y]
                          .                     .
                          .                     .
                                                .
        '''
        # A*x = B
        v_map    = dict() # the map from Xi to mesh coordinates
        mesh_map = dict() # the map from mesh coordinates to Xi

        # construct map
        map_id = 0 # if x[i] x[i+1] would be the row and col respectively for every even i
        for row in range(self.grid.global_mesh.shape[0]):
            for col in range(self.grid.global_mesh.shape[1]):
                v_map[map_id] = (row, col)
                mesh_map[(row, col)] = map_id
                map_id += 2

        # build Data Term
        # build Simularity Transform Term
        A_simularity = np.zeros((self.grid.count()*16, 2*len(v_map)))
        B_simularity = np.zeros((self.grid.count()*16, 1))
        A_data = np.zeros((2*self.feat.size(), 2*len(v_map)))
        B_data = np.zeros((2*self.feat.size(), 1))
        for i, feat_info in enumerate(self.feat.feat):
            cell_row, cell_col = feat_info.grid_pos
            tl = feat_info.temporal_coeff

            # data term
            v1_x_pos = mesh_map[(cell_row  , cell_col  )]; v1_y_pos = v1_x_pos + 1
            v2_x_pos = mesh_map[(cell_row+1, cell_col  )]; v2_y_pos = v2_x_pos + 1
            v3_x_pos = mesh_map[(cell_row+1, cell_col+1)]; v3_y_pos = v3_x_pos + 1
            v4_x_pos = mesh_map[(cell_row  , cell_col+1)]; v4_y_pos = v4_x_pos + 1

            A_data[2*i][v1_x_pos] = tl * feat_info.interpolation_coeff[0] # V1's coeff for x coordinate
            A_data[2*i][v2_x_pos] = tl * feat_info.interpolation_coeff[1] # V2's coeff for x coordinate
            A_data[2*i][v3_x_pos] = tl * feat_info.interpolation_coeff[2] # V3's coeff for x coordinate
            A_data[2*i][v4_x_pos] = tl * feat_info.interpolation_coeff[3] # V4's coeff for x coordinate

            A_data[2*i+1][v1_y_pos] = tl * feat_info.interpolation_coeff[0] # V1's coeff for y coordinate
            A_data[2*i+1][v2_y_pos] = tl * feat_info.interpolation_coeff[1] # V2's coeff for y coordinate
            A_data[2*i+1][v3_y_pos] = tl * feat_info.interpolation_coeff[2] # V3's coeff for y coordinate
            A_data[2*i+1][v4_y_pos] = tl * feat_info.interpolation_coeff[3] # V4's coeff for y coordinate

            B_data[2*i]   = tl * ( np.array(feat_info.dest_row) + 0.5) # to grid coordinate
            B_data[2*i+1] = tl * ( np.array(feat_info.dest_col) + 0.5) # to grid coordinate

        # simularity transfrom term for every grid
        for cell_row in range(self.grid.g_height):
            for cell_col in range(self.grid.g_width):
                Ws = self.grid.gridCell[cell_row][cell_col].salience

                v1_x_pos = mesh_map[(cell_row  , cell_col  )]; v1_y_pos = v1_x_pos + 1
                v2_x_pos = mesh_map[(cell_row+1, cell_col  )]; v2_y_pos = v2_x_pos + 1
                v3_x_pos = mesh_map[(cell_row+1, cell_col+1)]; v3_y_pos = v3_x_pos + 1
                v4_x_pos = mesh_map[(cell_row  , cell_col+1)]; v4_y_pos = v4_x_pos + 1

                index_offset = cell_row * self.grid.g_height + cell_col
                index_offset *= 16

                # first triangle
                u  = self.grid.gridCell[cell_row][cell_col].u_v[0][0]
                v  = self.grid.gridCell[cell_row][cell_col].u_v[0][1]
                A_simularity[index_offset+0][v1_x_pos] = Ws * ( 1 )
                A_simularity[index_offset+0][v2_x_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+0][v2_y_pos] = Ws * ( v )
                A_simularity[index_offset+0][v3_x_pos] = Ws * ( -u )
                A_simularity[index_offset+0][v3_y_pos] = Ws * ( -v )
                B_simularity[index_offset+0] = 0
                A_simularity[index_offset+1][v1_y_pos] = Ws * ( 1 )
                A_simularity[index_offset+1][v2_y_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+1][v2_x_pos] = Ws * ( -v )
                A_simularity[index_offset+1][v3_y_pos] = Ws * ( -u )
                A_simularity[index_offset+1][v3_x_pos] = Ws * ( v )
                B_simularity[index_offset+1] = 0

                # second triangle
                u  = self.grid.gridCell[cell_row][cell_col].u_v[1][0]
                v  = self.grid.gridCell[cell_row][cell_col].u_v[1][1]
                A_simularity[index_offset+2][v1_x_pos] = Ws * ( 1 )
                A_simularity[index_offset+2][v4_x_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+2][v4_y_pos] = Ws * ( v )
                A_simularity[index_offset+2][v3_x_pos] = Ws * ( -u )
                A_simularity[index_offset+2][v3_y_pos] = Ws * ( -v )
                B_simularity[index_offset+2] = 0
                A_simularity[index_offset+3][v1_y_pos] = Ws * ( 1 )
                A_simularity[index_offset+3][v4_y_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+3][v4_x_pos] = Ws * ( -v )
                A_simularity[index_offset+3][v3_y_pos] = Ws * ( -u )
                A_simularity[index_offset+3][v3_x_pos] = Ws * ( v )
                B_simularity[index_offset+3] = 0

                # third triangle
                u  = self.grid.gridCell[cell_row][cell_col].u_v[2][0]
                v  = self.grid.gridCell[cell_row][cell_col].u_v[2][1]
                A_simularity[index_offset+4][v2_x_pos] = Ws * ( 1 )
                A_simularity[index_offset+4][v3_x_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+4][v3_y_pos] = Ws * ( v )
                A_simularity[index_offset+4][v4_x_pos] = Ws * ( -u )
                A_simularity[index_offset+4][v4_y_pos] = Ws * ( -v )
                B_simularity[index_offset+4] = 0
                A_simularity[index_offset+5][v2_y_pos] = Ws * ( 1 )
                A_simularity[index_offset+5][v3_y_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+5][v3_x_pos] = Ws * ( -v )
                A_simularity[index_offset+5][v4_y_pos] = Ws * ( -u )
                A_simularity[index_offset+5][v4_x_pos] = Ws * ( v )
                B_simularity[index_offset+5] = 0

                # forth triangle
                u  = self.grid.gridCell[cell_row][cell_col].u_v[3][0]
                v  = self.grid.gridCell[cell_row][cell_col].u_v[3][1]
                A_simularity[index_offset+6][v2_x_pos] = Ws * ( 1 )
                A_simularity[index_offset+6][v1_x_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+6][v1_y_pos] = Ws * ( v )
                A_simularity[index_offset+6][v4_x_pos] = Ws * ( -u )
                A_simularity[index_offset+6][v4_y_pos] = Ws * ( -v )
                B_simularity[index_offset+6] = 0
                A_simularity[index_offset+7][v2_y_pos] = Ws * ( 1 )
                A_simularity[index_offset+7][v1_y_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+7][v1_x_pos] = Ws * ( -v )
                A_simularity[index_offset+7][v4_y_pos] = Ws * ( -u )
                A_simularity[index_offset+7][v4_x_pos] = Ws * ( v )
                B_simularity[index_offset+7] = 0

                # fifth triangle
                u  = self.grid.gridCell[cell_row][cell_col].u_v[4][0]
                v  = self.grid.gridCell[cell_row][cell_col].u_v[4][1]
                A_simularity[index_offset+8][v3_x_pos] = Ws * ( 1 )
                A_simularity[index_offset+8][v4_x_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+8][v4_y_pos] = Ws * ( v )
                A_simularity[index_offset+8][v1_x_pos] = Ws * ( -u )
                A_simularity[index_offset+8][v1_y_pos] = Ws * ( -v )
                B_simularity[index_offset+8] = 0
                A_simularity[index_offset+9][v3_y_pos] = Ws * ( 1 )
                A_simularity[index_offset+9][v4_y_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+9][v4_x_pos] = Ws * ( -v )
                A_simularity[index_offset+9][v1_y_pos] = Ws * ( -u )
                A_simularity[index_offset+9][v1_x_pos] = Ws * ( v )
                B_simularity[index_offset+9] = 0

                # sixth triangle
                u  = self.grid.gridCell[cell_row][cell_col].u_v[5][0]
                v  = self.grid.gridCell[cell_row][cell_col].u_v[5][1]
                A_simularity[index_offset+10][v3_x_pos] = Ws * ( 1 )
                A_simularity[index_offset+10][v2_x_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+10][v2_y_pos] = Ws * ( v )
                A_simularity[index_offset+10][v1_x_pos] = Ws * ( -u )
                A_simularity[index_offset+10][v1_y_pos] = Ws * ( -v )
                B_simularity[index_offset+10] = 0
                A_simularity[index_offset+11][v3_y_pos] = Ws * ( 1 )
                A_simularity[index_offset+11][v2_y_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+11][v2_x_pos] = Ws * ( -v )
                A_simularity[index_offset+11][v1_y_pos] = Ws * ( -u )
                A_simularity[index_offset+11][v1_x_pos] = Ws * ( v )
                B_simularity[index_offset+11] = 0

                # seventh triangle
                u  = self.grid.gridCell[cell_row][cell_col].u_v[6][0]
                v  = self.grid.gridCell[cell_row][cell_col].u_v[6][1]
                A_simularity[index_offset+12][v4_x_pos] = Ws * ( 1 )
                A_simularity[index_offset+12][v1_x_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+12][v1_y_pos] = Ws * ( v )
                A_simularity[index_offset+12][v2_x_pos] = Ws * ( -u )
                A_simularity[index_offset+12][v2_y_pos] = Ws * ( -v )
                B_simularity[index_offset+12] = 0
                A_simularity[index_offset+13][v4_y_pos] = Ws * ( 1 )
                A_simularity[index_offset+13][v1_y_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+13][v1_x_pos] = Ws * ( -v )
                A_simularity[index_offset+13][v2_y_pos] = Ws * ( -u )
                A_simularity[index_offset+13][v2_x_pos] = Ws * ( v )
                B_simularity[index_offset+13] = 0

                # eighth triangle
                u  = self.grid.gridCell[cell_row][cell_col].u_v[7][0]
                v  = self.grid.gridCell[cell_row][cell_col].u_v[7][1]
                A_simularity[index_offset+14][v4_x_pos] = Ws * ( 1 )
                A_simularity[index_offset+14][v3_x_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+14][v3_y_pos] = Ws * ( v )
                A_simularity[index_offset+14][v2_x_pos] = Ws * ( -u )
                A_simularity[index_offset+14][v2_y_pos] = Ws * ( -v )
                B_simularity[index_offset+14] = 0
                A_simularity[index_offset+15][v4_y_pos] = Ws * ( 1 )
                A_simularity[index_offset+15][v3_y_pos] = Ws * ( u - 1 )
                A_simularity[index_offset+15][v3_x_pos] = Ws * ( -v )
                A_simularity[index_offset+15][v2_y_pos] = Ws * ( -u )
                A_simularity[index_offset+15][v2_x_pos] = Ws * ( v )
                B_simularity[index_offset+15] = 0

        A_simularity *= self.alpha
        B_simularity *= self.alpha

        A = np.vstack((A_data, A_simularity[1:]))
        B = np.vstack((B_data, B_simularity[1:]))

        X, _, _, _ = np.linalg.lstsq(A, B, rcond=None)

        # round the solution
        X = np.array([ round(x) for x in X.reshape(-1) ]).reshape((-1, 1))

        # apply the result
        for i in range(X.shape[0]):
            if i % 2 != 0: continue
            mesh_row, mesh_col = v_map[i]
            self.grid.warpped_mesh[mesh_row][mesh_col] = np.array([X[i][0], X[i+1][0]])

        for cell_row in range(self.grid.g_height):
            for cell_col in range(self.grid.g_width):
                v1 = self.grid.warpped_mesh[cell_row  ][cell_col  ]
                v2 = self.grid.warpped_mesh[cell_row+1][cell_col  ]
                v3 = self.grid.warpped_mesh[cell_row+1][cell_col+1]
                v4 = self.grid.warpped_mesh[cell_row  ][cell_col+1]
                self.grid.gridCell[cell_row][cell_col].set_corners(v1, v2, v3, v4)
        print ('local warp finished.')
        return

    def map_texture(self, image):
        self.grid.map_texture(image)

    def compute_bilinear_interpolation(self):
        for i, feat_info in enumerate(self.feat.feat):
            corresponding_cell = self.grid.gridCell[feat_info.grid_pos[0]][feat_info.grid_pos[1]]
            self.feat.set_coefficients(i, corresponding_cell.compute_coeff(feat_info.global_pos))

    def read_feature_points(self, filename, margin):
        self.feat.read(filename, margin)

    def set_grid_info_to_feat(self):
        for i, feat_info in enumerate(self.feat.feat):
            self.feat.set_grid_position(i, self.grid.FeatToCellCoor(feat_info.pos))
Ejemplo n.º 33
0
def get_feature_by_list(list):
    feat_dict = {}
    for index, item in enumerate(list):
        if item != 0:
            feat_dict[index+1] = item
    return Feature("", len(list), feat_dict)
Ejemplo n.º 34
0
 class Numbers(BaseModel, self.Settings):
     stream = Feature(NumberStream, store=False)
     add1 = Feature(Add, needs=stream, store=False, rhs=1)
     add2 = Feature(Add, needs=stream, store=False, rhs=1)
     sumup = Feature(SumUp, needs=(add1, add2), store=True)
Ejemplo n.º 35
0
 def feature(self, ref, path):
     '''Returns a Feature object corresponding to the passed ref and path'''
     return Feature(self, ref, path)
Ejemplo n.º 36
0
class NavDataset(data.Dataset):
    def __init__(self, json_dirs, tok, img_path, panoramic, args):

        # read all json files and create a list of query data
        self.json_dirs = json_dirs  #  a list of json files
        self.tok = tok  # should be a lang, vision, action aware tokenizer ['VCLS', 'ACLS']
        self.mask_index = tok._convert_token_to_id(tok.mask_token)
        self.feature_store = Feature(img_path, panoramic)
        self.args = args

        self.data = []
        self.instr_refer = dict()  # instr_id : instr_encoding
        for json_dir in self.json_dirs:
            with open(json_dir) as f:
                current_trajs = json.load(f)
                for traj in current_trajs:
                    self.data += self.disentangle_path(traj)

    def __getitem__(self, index):
        # you must return data and label pair tensor
        query = self.data[index]
        output = self.getQuery(query)
        return {key: torch.tensor(value) for key, value in output.items()}

    def __len__(self):
        return len(self.data)

    def disentangle_path(self, traj):
        query = list()
        instr_id = traj['instr_id']
        instruction = traj['instr_encoding']
        self.instr_refer[instr_id] = instruction

        path = traj['path']
        actions = traj['teacher_actions']
        action_emds = traj['teacher_action_emd']
        for t in range(len(path)):
            scan = path[t][0]
            viewpoint = path[t][1]
            viewIndex = path[t][2]
            teacher_action = actions[t]
            absViewIndex, rel_heading, rel_elevation = action_emds[t]

            current_query = SingleQuery(instr_id, scan, viewpoint, viewIndex,
                                        teacher_action, absViewIndex,
                                        rel_heading, rel_elevation)
            if t <= len(path) - 2:
                next_scan = path[t + 1][0]
                next_viewpoint = path[t + 1][1]
                next_viewIndex = path[t + 1][2]
                next_teacher_action = actions[t + 1]
                next_absViewIndex, next_rel_heading, next_rel_elevation = action_emds[
                    t + 1]
                next_query = SingleQuery(instr_id, next_scan, next_viewpoint,
                                         next_viewIndex, next_teacher_action,
                                         next_absViewIndex, next_rel_heading,
                                         next_rel_elevation)
            else:
                next_query = current_query

            current_query.next = next_query
            query.append(current_query)  # a list of (SASA)

        return query

    def getQuery(self, query):
        # prepare text tensor
        output = dict()
        text_seq = torch.LongTensor(self.instr_refer[query.instr_id])
        masked_text_seq, masked_text_label, attention_mask = mask_tokens(
            text_seq, self.tok, self.args)
        output['masked_text_seq'] = masked_text_seq
        output['masked_text_label'] = masked_text_label
        output['lang_attention_mask'] = attention_mask

        # prepare vision tensor
        scan, viewpoint, viewindex = query.scan, query.viewpoint, query.viewIndex
        feature_all, feature_1 = self.feature_store.rollout(
            scan, viewpoint, viewindex)
        feature_with_loc_all = np.concatenate(
            (feature_all, _static_loc_embeddings[viewindex]), axis=-1)
        output['feature_all'] = feature_with_loc_all

        # prepare action
        if query.absViewIndex == -1:
            teacher_action_embedding = np.zeros(feature_all.shape[-1] + 128,
                                                np.float32)
        else:
            teacher_view = feature_all[query.absViewIndex, :]
            loc_embedding = np.zeros(128, np.float32)
            loc_embedding[0:32] = np.sin(query.rel_heading)
            loc_embedding[32:64] = np.cos(query.rel_heading)
            loc_embedding[64:96] = np.sin(query.rel_elevation)
            loc_embedding[96:] = np.cos(query.rel_elevation)
            teacher_action_embedding = np.concatenate(
                (teacher_view, loc_embedding))
        output['teacher'] = query.teacher_action
        output['teacher_embedding'] = teacher_action_embedding

        # prepare next step info
        nscan, nviewpoint, nviewindex = query.next.scan, query.next.viewpoint, query.next.viewIndex
        nfeature_all, nfeature_1 = self.feature_store.rollout(
            nscan, nviewpoint, nviewindex)
        nfeature_with_loc_all = np.concatenate(
            (nfeature_all, _static_loc_embeddings[nviewindex]), axis=-1)
        output['next_feature_all'] = nfeature_with_loc_all

        if query.next.absViewIndex == -1:
            nteacher_action_embedding = np.zeros(feature_all.shape[-1] + 128,
                                                 np.float32)
        else:
            nteacher_view = nfeature_all[query.next.absViewIndex, :]
            nloc_embedding = np.zeros(128, np.float32)
            nloc_embedding[0:32] = np.sin(query.next.rel_heading)
            nloc_embedding[32:64] = np.cos(query.next.rel_heading)
            nloc_embedding[64:96] = np.sin(query.next.rel_elevation)
            nloc_embedding[96:] = np.cos(query.next.rel_elevation)
            nteacher_action_embedding = np.concatenate(
                (nteacher_view, nloc_embedding))
        output['next_teacher'] = query.next.teacher_action
        output['next_teacher_embedding'] = nteacher_action_embedding

        # prepare random next step info
        prob = np.random.random()
        if prob <= 0.5:
            output['isnext'] = 1
            output['next_img'] = output['next_feature_all']
        else:
            output['isnext'] = 0
            candidates = list(range(36))
            candidates.remove(nviewindex)
            fake_nviewindex = np.random.choice(candidates)
            ffeature_all, ffeature_1 = self.feature_store.rollout(
                nscan, nviewpoint, fake_nviewindex)
            ffeature_with_loc_all = np.concatenate(
                (ffeature_all, _static_loc_embeddings[fake_nviewindex]),
                axis=-1)
            output['next_img'] = ffeature_with_loc_all

        return output

    def random_word(self, text_seq):
        tokens = text_seq.copy()  # already be [cls t1 t2 sep]
        output_label = []

        for i, token in enumerate(tokens):
            if i == 0 or i == len(tokens) - 1:
                output_label.append(0)
                continue
            prob = np.random.random()
            if prob < 0.15:
                prob /= 0.15

                output_label.append(tokens[i])

                # 80% randomly change token to mask token
                if prob < 0.8:
                    tokens[i] = self.mask_index

                # 10% randomly change token to random token
                elif prob < 0.9:
                    tokens[i] = random.randrange(len(self.tok))

                # 10% randomly change token to current token
                else:
                    tokens[i] = tokens[i]  # just keep it

            else:
                tokens[i] = tokens[i]  # just keep it
                output_label.append(0)

        return tokens, output_label
Ejemplo n.º 37
0
from application import App
from feature import Feature

apps = [
    App('hyper', 'https://releases.hyper.is/download/win'),
    App('git',
        'https://central.github.com/deployments/desktop/desktop/latest/win32'),
    App('atom', 'https://atom.io/download/windows_x64'),
    App('python3.6.2',
        'https://www.python.org/ftp/python/3.6.2/python-3.6.2.exe',
        '/quiet PrependPath=1'),
    Feature('Microsoft-Windows-Subsystem-Linux')
]


def main():
    for i in range(len(apps)):
        if apps[i].download:
            apps[i].download()
    for i in range(len(apps)):
        apps[i].install()


if __name__ == '__main__':
    main()
Ejemplo n.º 38
0
 class Timestamps(BaseModel, self.Settings):
     stream = Feature(TextStream, store=True)
     t1 = Feature(Timestamp, needs=stream, store=True)
     t2 = Feature(Timestamp, needs=stream, store=False)
     cat = Feature( \
             Concatenate, needs=[t1, t2], store=False)
Ejemplo n.º 39
0
 class D1(BaseModel, self.Settings):
     stream = Feature(TextStream, store=True)
     words = Feature(Tokenizer, needs=stream, store=False)
Ejemplo n.º 40
0
 def __init__(self, column):
     Feature.__init__(self)
     self.column = column
Ejemplo n.º 41
0
 def __init__(self, field, reverse=False):
     self.reverse = reverse
     Feature.__init__(self, field)
Ejemplo n.º 42
0
 def __init__(self, field, string):
     self._string = string
     Feature.__init__(self, field)
Ejemplo n.º 43
0
 class D2(D1):
     words = Feature(Tokenizer, needs=D1.stream, store=True)
Ejemplo n.º 44
0
def get_feature_by_feat_list(dict, feat_list):
    feat_dict = {}
    for feat in feat_list:
        if feat in dict:
            feat_dict[dict[feat]] = 1
    return Feature("", len(dict), feat_dict)
Ejemplo n.º 45
0
 class D(BaseModel, self.Settings):
     stream = Feature(TextStream, store=True)
     opposite = Feature(Contrarion, needs=stream, store=True)
Ejemplo n.º 46
0
class ConvTextClassfication(object):
    
    def __init__(self, args):
        self.args = args
        self.data_reader = Data_reader()
        self.raw_train_data = self.data_reader.read_train_data()
        self.raw_test_data = self.data_reader.read_test_data()

        self.feature = Feature(args)
        self.train_data = []
        self.labels = []
        self.val_data = []
        self.val_labels = []
        self.test_data = []

    def process_data(self):
        self.train_data, self.labels = self.feature.extract_feature(self.raw_train_data)
        self.test_data = self.feature.extract_test_feature(self.raw_test_data)

    def partition_data(self):
        
        num = len(self.train_data)
        partition_point = num - int(num / 10.0)
        #print self.labels

        self.val_data = self.train_data[partition_point:]
        self.val_labels = self.labels[partition_point:]
        self.train_data = self.train_data[:partition_point]
        self.labels = self.labels[:partition_point]


    def _loss(self, logits, L2_loss, labels):
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=labels, name='aaa')
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='ppp')
        return cross_entropy_mean 

    def _forward(self, batch_x):
        layers = []
        layers.append(tfnnutils.InputLayer())
        layers.append(tfnnutils.Conv2D('conv1', ksize=(self.args.feature, 7), kernels=1)) 
        layers.append(tfnnutils.MaxPool((1, 3)))
        layers.append(tfnnutils.Conv2D('conv2', ksize=(self.args.feature, 7), kernels=1)) 
        layers.append(tfnnutils.MaxPool((1, 3)))
        layers.append(tfnnutils.Conv2D('conv3', ksize=(self.args.feature, 3), kernels=1)) 
        layers.append(tfnnutils.Conv2D('conv4', ksize=(self.args.feature, 3), kernels=1)) 
        layers.append(tfnnutils.Conv2D('conv5', ksize=(self.args.feature, 3), kernels=1)) 
        layers.append(tfnnutils.Conv2D('conv6', ksize=(self.args.feature, 3), kernels=1)) 
        layers.append(tfnnutils.MaxPool((1, 3)))
        layers.append(tfnnutils.Flatten())
        layers.append(tfnnutils.FCLayer('FC1', 1024, act = tf.nn.relu))
        layers.append(tfnnutils.FCLayer('FC2', 1024, act = tf.nn.relu))
        layers.append(tfnnutils.FCLayer('FC3', 2, act = tf.nn.relu))

        L2_loss = 0.
        last_layer = None
        for i, layer in enumerate(layers):
            if hasattr(layer, 'L2_Loss'):
                L2_loss += layer.L2_Loss
            batch_x = layer.forward(last_layer, batch_x)
            last_layer = layer

        pred = tf.nn.softmax(batch_x)

        return pred, batch_x, L2_loss

    def build_model(self):
        global_step = tf.get_variable(
                'global_step', [],
                initializer=tf.constant_initializer(0), trainable=False)
        self.lr = tf.placeholder(tf.float32, shape=[])
        opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9)
        self._x = tf.placeholder(tf.float32, shape=[self.args.BatchSize, self.args.feature, self.args.length, 1])
        self._y = tf.placeholder(tf.int32)
        x = self._x
        y = self._y

        pred, logits, L2_loss = self._forward(x)
        loss = self._loss(logits, L2_loss, y)

        grads = opt.compute_gradients(loss)
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer() 

        self.sess = tf.Session()

        self.sess.run(init)

        self.train_step = apply_gradient_op
        self.pred_step  = pred
        self.loss_step  = loss

        if self.args.test == 1 and self.args.load_model != '':
            print 'Restore the model from %s' % self.args.load_model
            saver = tf.train.Saver()
            saver.restore(self.sess, self.args.load_model)
            print 'Finish restoring the model'


    def get_batch(self, dataset, labels, index):
        #print 'start getting a batch'
        st = index * self.args.BatchSize
        ed = st + self.args.BatchSize
        #print '---------'
        #print len(dataset)
        if ed >= len(dataset):
            return None, None
        ret_x = np.zeros((self.args.BatchSize, self.args.feature, self.args.length), np.float32)
        ret_y = np.zeros((self.args.BatchSize, ), np.int32)
        ret_x = np.array(dataset[st:ed])
        ret_y = np.array(labels[st:ed])
        #for i in xrange(st, ed):
        #    print type(dataset[i]['content'])
        #    ret_x[i] = np.array(dataset[i]['content'])
        #    ret_y[i] = np.array(self.labels[i])

        ret_x = ret_x.reshape(self.args.BatchSize, self.args.feature, self.args.length, 1)
       
        return ret_x, ret_y

    def get_batch_predict(self, dataset, index):
        st = index * self.args.BatchSize
        ed = st + self.args.BatchSize

        if ed >= len(dataset):
            return None

        ret_x = np.zeros((self.args.BatchSize, self.args.feature, self.args.length), np.float32)
        ret_x = np.array(dataset[st:ed])
        ret_y = np.zeros((self.args.BatchSize, ), np.int32)

        ret_x = ret_x.reshape(self.args.BatchSize, self.args.feature, self.args.length, 1)

        return ret_x, ret_y

    def evaluate(self, dataset, labels):
        batch_size = self.args.BatchSize
        total_loss = 0.
        total_err = 0.
        n_batch = 0
        now_pos = 0
        print 'start evaluating'
        while True:
            prepared_x, prepared_y = self.get_batch(dataset, labels, n_batch)
            if prepared_x is None:
                break
            feed = {self._x: prepared_x, self._y: prepared_y}
            loss, preds = self.sess.run([self.loss_step, self.pred_step], feed_dict=feed)
            #print prepared_y[:10]
            #print preds[:10]
            total_loss += np.mean(loss)
            for i in xrange(len(preds)):
                if np.argmax(preds[i]) != prepared_y[i]:
                    total_err += 1

            n_batch += 1
            if n_batch > 10:
                break

        loss = total_loss / n_batch
        err = total_err / (n_batch * batch_size)

        print 'evaluate: loss = %f err = %f' % (loss, err)
        
        return loss, err

    def predict(self, dataset):
        batch_size = self.args.BatchSize
        predictions = []
        n_batch = 0
        now_pos = 0
        print 'starting predicting the test dataset'
        while True:
            prepared_x, prepared_y = self.get_batch_predict(dataset, n_batch)
            if prepared_x is None:
                break
            feed = {self._x: prepared_x, self._y: prepared_y}
            _, preds = self.sess.run([self.loss_step, self.pred_step], feed_dict=feed)
            predictions.extend(preds)

            n_batch += 1

        return predictions



    def save(self, dirname):
        try:
            os.makedirs(dirname)
        except:
            pass
        saver = tf.train.Saver()
        return saver.save(self.sess, os.path.join(dirname, "model1.ckpt"))
   
    def test(self):
        print 'starting test'
        predictions = self.predict(self.test_data)
        with open('ans', 'w') as f:
            for item in predictions:
                try:
                    f.write(item[1])
                except:
                    print item

    def train(self):
        lr = self.args.lr
        best_acc = 0.0
        for epoch in xrange(self.args.num_epoch):
            n_train_batch = 0
            print n_train_batch
            batch_size = self.args.BatchSize
            if epoch > 0  and epoch % 3 == 0:
                lr /= 2.0
            while True:
                prepared_x, prepared_y = self.get_batch(self.train_data, self.labels, n_train_batch)
                if prepared_x is None:
                    print 'miemiemie'
                    break
                feed = {self.lr: lr, self._x: prepared_x, self._y: prepared_y}
                _, loss = self.sess.run([self.train_step, self.loss_step], feed_dict=feed)
                if n_train_batch % 100 == 0:
                    print 'The iteration is %d train loss is: %f' % (n_train_batch, loss)
                if n_train_batch % 1000 == 0:
                    self.evaluate(self.val_data, self.val_labels)

                n_train_batch += 1
        print 'start saving the model'
        self.save(args.save_model)
        print 'finish saving the model'
Ejemplo n.º 47
0
 class D1(BaseModel):
     stream = Feature(TextStream, store=True)
Ejemplo n.º 48
0
if __name__ == "__main__":
    data_path = "data/20_newsgroups"
    corpus = Corpus()
    corpus.load_index(data_path)
    # gen tokens, uncomment when it needs regenerate
    # corpus.gen_tokens(gnosisTokenizer())
    # calc features
    corpus_tokens = []
    corpus_labels = []
    for category in corpus.category_list:
        content = Tokenizer.load_category(category)
        if content:
            corpus_tokens.extend(content)
            corpus_labels.extend([corpus.category_list.index(category)] *
                                 len(content))
    feature = Feature()
    feature.make_vsm(corpus_tokens)
    # feature.print_vsm()
    # reduce feature, k==0 means auto detect
    # feature.reducex(corpus_labels, cate_list=corpus.category_list)
    feature.reduce_feature(corpus_labels, k=0)
    feature_id = "feature.txt"
    feature.store(feature_id)

    # classify
    # lib svm
    classifier = LibSvmClassifier(feature_id)
    y_actual, y_predict = classifier.do_classify()
    Classifier.predict_info("Lib SVM", y_actual, y_predict)
    #  sklearn svm
    classifier = SvmClassifier(feature.feature_vec, feature.feature_label)
Ejemplo n.º 49
0
class Doc4(BaseModel):
    stream = Feature(TextStream, chunksize=10, store=False)
    smaller = Feature(TextStream, needs=stream, chunksize=3, store=True)
Ejemplo n.º 50
0
 class Document(BaseModel, self.Settings):
     stream = Feature(TextStream, store=True)
     dam = Feature(Dam, needs=stream, store=False)
     words = Feature(Tokenizer, needs=dam, store=False)
     count = JSONFeature(WordCount, needs=words, store=False)
Ejemplo n.º 51
0
 class D1(BaseModel):
     stream = Feature(TextStream, store=False)
     echo = Feature(Echo, needs=stream, store=True)
Ejemplo n.º 52
0
 class D(BaseModel, self.Settings):
     stream = Feature(TextStream, store=True)
     copy = Feature(Counter, needs=stream, store=False)
     words = Feature(Tokenizer, needs=copy, store=False)
     count = JSONFeature(WordCount, needs=words, store=False)
Ejemplo n.º 53
0
class MultipleRoots(BaseModel):
    stream1 = Feature(TextStream, chunksize=3, store=False)
    stream2 = Feature(TextStream, chunksize=3, store=False)
    cat = Feature(EagerConcatenate, needs=[stream1, stream2], store=True)
Ejemplo n.º 54
0
def _Promote(arg, klass):
    """Wrap an argument in an object of the specified class.

  This is used to e.g.: promote numbers or strings to Images and arrays
  to Collections.

  Args:
    arg: The object to promote.
    klass: The expected type.

  Returns:
    The argument promoted if the class is recognized, otherwise the
    original argument.
  """
    if arg is None:
        return arg

    if klass == 'Image':
        return Image(arg)
    elif klass == 'Feature':
        if isinstance(arg, Collection):
            # TODO(user): Decide whether we want to leave this in. It can be
            #              quite dangerous on large collections.
            return ApiFunction.call_(
                'Feature', ApiFunction.call_('Collection.geometry', arg))
        else:
            return Feature(arg)
    elif klass == 'Element':
        if isinstance(arg, Element):
            # Already an Element.
            return arg
        elif isinstance(arg, Geometry):
            # Geometries get promoted to Features.
            return Feature(arg)
        elif isinstance(arg, ComputedObject):
            # Try a cast.
            return Element(arg.func, arg.args, arg.varName)
        else:
            # No way to convert.
            raise EEException('Cannot convert %s to Element.' % arg)
    elif klass == 'Geometry':
        if isinstance(arg, Collection):
            return ApiFunction.call_('Collection.geometry', arg)
        else:
            return Geometry(arg)
    elif klass in ('FeatureCollection', 'Collection'):
        # For now Collection is synonymous with FeatureCollection.
        if isinstance(arg, Collection):
            return arg
        else:
            return FeatureCollection(arg)
    elif klass == 'ImageCollection':
        return ImageCollection(arg)
    elif klass == 'Filter':
        return Filter(arg)
    elif klass == 'Algorithm':
        if isinstance(arg, basestring):
            # An API function name.
            return ApiFunction.lookup(arg)
        elif callable(arg):
            # A native function that needs to be wrapped.
            args_count = len(inspect.getargspec(arg).args)
            return CustomFunction.create(arg, 'Object',
                                         ['Object'] * args_count)
        elif isinstance(arg, Encodable):
            # An ee.Function or a computed function like the return value of
            # Image.parseExpression().
            return arg
        else:
            raise EEException('Argument is not a function: %s' % arg)
    elif klass == 'Dictionary':
        if isinstance(arg, dict):
            return arg
        else:
            return Dictionary(arg)
    elif klass == 'String':
        if (types.isString(arg) or isinstance(arg, ComputedObject)
                or isinstance(arg, String)):
            return String(arg)
        else:
            return arg
    elif klass == 'List':
        return List(arg)
    elif klass in ('Number', 'Float', 'Long', 'Integer', 'Short', 'Byte'):
        return Number(arg)
    elif klass in globals():
        cls = globals()[klass]
        ctor = ApiFunction.lookupInternal(klass)
        # Handle dynamically created classes.
        if isinstance(arg, cls):
            # Return unchanged.
            return arg
        elif ctor:
            # The client-side constructor will call the server-side constructor.
            return cls(arg)
        elif isinstance(arg, basestring):
            if hasattr(cls, arg):
                # arg is the name of a method in klass.
                return getattr(cls, arg)()
            else:
                raise EEException('Unknown algorithm: %s.%s' % (klass, arg))
        else:
            # Client-side cast.
            return cls(arg)
    else:
        return arg
Ejemplo n.º 55
0
 class D2(BaseModel, self.Settings):
     stream = Feature(TextStream, store=True)
     words = Feature(Tokenizer, needs=stream, store=False)
     count = JSONFeature(WordCount, needs=words, store=True)
     aggregate = JSONFeature(WordCountAggregator, needs=count,
                             store=True)
Ejemplo n.º 56
0
 class Split(BaseModel, self.Settings):
     stream = Feature(TextStream, store=False)
     uppercase = Feature(ToUpper, needs=stream, store=True)
     lowercase = Feature(ToLower, needs=stream, store=True)
     cat = Feature(
             Concatenate, needs=[uppercase, lowercase], store=False)
Ejemplo n.º 57
0
def _Promote(arg, klass):
  """Wrap an argument in an object of the specified class.

  This is used to e.g.: promote numbers or strings to Images and arrays
  to Collections.

  Args:
    arg: The object to promote.
    klass: The expected type.

  Returns:
    The argument promoted if the class is recognized, otherwise the
    original argument.
  """
  if arg is None:
    return arg

  if klass == 'Image':
    return Image(arg)
  elif klass == 'Feature':
    if isinstance(arg, Collection):
      # TODO(user): Decide whether we want to leave this in. It can be
      #              quite dangerous on large collections.
      return ApiFunction.call_(
          'Feature', ApiFunction.call_('Collection.geometry', arg))
    else:
      return Feature(arg)
  elif klass in ('Element', 'EEObject'):
    # TODO(user): Remove EEObject once the server is updated.
    if isinstance(arg, Element):
      # Already an EEObject.
      return arg
    elif isinstance(arg, ComputedObject):
      # Try a cast.
      return Element(arg.func, arg.args)
    else:
      # No way to convert.
      raise EEException('Cannot convert %s to Element.' % arg)
  elif klass == 'Geometry':
    if isinstance(arg, Collection):
      return ApiFunction.call_('Collection.geometry', arg)
    else:
      return Geometry(arg)
  elif klass in ('FeatureCollection', 'Collection'):
    # For now Collection is synonymous with FeatureCollection.
    if isinstance(arg, Collection):
      return arg
    else:
      return FeatureCollection(arg)
  elif klass == 'ImageCollection':
    return ImageCollection(arg)
  elif klass == 'Filter':
    return Filter(arg)
  elif klass == 'Algorithm' and isinstance(arg, basestring):
    return ApiFunction.lookup(arg)
  elif klass == 'Date':
    if isinstance(arg, basestring):
      try:
        import dateutil.parser    # pylint: disable=g-import-not-at-top
      except ImportError:
        raise EEException(
            'Conversion of strings to dates requires the dateutil library.')
      else:
        return dateutil.parser.parse(arg)
    elif isinstance(arg, numbers.Number):
      return datetime.datetime.fromtimestamp(arg / 1000)
    elif isinstance(arg, ComputedObject):
      # Bypass promotion of this and do it directly.
      func = ApiFunction.lookup('Date')
      return ComputedObject(func, func.promoteArgs(func.nameArgs([arg])))
    else:
      return arg
  elif klass == 'Dictionary':
    if klass not in globals():
      # No dictionary class defined.
      return arg
    cls = globals()[klass]
    if isinstance(arg, cls):
      return arg
    elif isinstance(arg, ComputedObject):
      return cls(arg)
    else:
      # Can't promote non-ComputedObjects up to Dictionary; no constructor.
      return arg
  elif klass == 'String':
    if (types.isString(arg) or
        isinstance(arg, ComputedObject) or
        isinstance(arg, String) or
        types.isVarOfType(arg, String)):
      return String(arg)
    else:
      return arg
  elif klass in globals():
    cls = globals()[klass]
    # Handle dynamically created classes.
    if isinstance(arg, cls):
      return arg
    elif isinstance(arg, basestring):
      if not hasattr(cls, arg):
        raise EEException('Unknown algorithm: %s.%s' % (klass, arg))
      return getattr(cls, arg)()
    else:
      return cls(arg)
  else:
    return arg
Ejemplo n.º 58
0
def processSQuADtrain(trainfile, destfile, useQuestionClassificationAPI,
                      authcode):
    '''
    Generate "Question	Gold	Top3_Distractors	Q_Coarse	Q_Fine	Gold_spaCy	Candidate_Type" for SQuAD train dataset
    :param trainfile: SQuAD train dataset 'SQuAD/train-v2.0.json'
    :param destfile: destination file, saving "Question	Gold	Top3_Distractors	Q_Coarse	Q_Fine	Gold_spaCy	Candidate_Type" columns as a tsv file
    :param useQuestionClassificationAPI: true/false, for detail please refer to http://www.harishmadabushi.com/research/questionclassification/question-classification-api-documentation/
    :param authcode: authcode for QuestionClassificationAPI, please contact http://www.harishmadabushi.com/research/questionclassification/question-classification-api-documentation/
    :return: None
    '''
    with open(trainfile) as f:
        data = json.load(f)
    with open(destfile, 'w') as fw:
        fw.write('\t'.join([
            'Question', 'Gold', 'Top3_Distractors', 'Q_Coarse', 'Q_Fine',
            'Gold_spaCy', 'Candidate_Type'
        ]) + '\n')
        articleList = []
        for i, article in enumerate(
                data['data']):  # SQuAD train dataset num of articles = 442
            title = article['title']
            paragraphs = article['paragraphs']
            contextAll = ''
            QAList = []

            for paragraph in paragraphs:
                contextAll += paragraph[
                    'context'] + '\n'  # merge all paragraphs
            article = Article(title=title, contextAll=contextAll)
            article.nlp()  # named entity recognition
            article.entityBERTEmb = getEntityBERTEmb(article.entitySet)
            for paragraph in paragraphs:
                for qid, qa in enumerate(paragraph['qas']):

                    curQA = QA(question=qa['question'],
                               isImpossible=qa['is_impossible'])
                    response = None
                    if useQuestionClassificationAPI.lower() == 'true':
                        response = questionClassificationAPI(
                            curQA.question, authcode)
                    if response and response['status'] == 'Success':
                        curQA.questionCoarseType = response['major_type']
                        curQA.questionFineType = response['minor_type']
                    if curQA.isImpossible:
                        curQA.gold = None
                        curQA.goldStartIdx = None
                        curQA.goldEndIdx = None
                        curQA.goldNERType = None
                    else:
                        curQA.gold = qa['answers'][0]['text']
                        curQA.goldStartIdx = qa['answers'][0]['answer_start']
                        curQA.goldEndIdx = curQA.goldStartIdx + len(curQA.gold)
                        if curQA.gold in article.entityText:  # if gold exactly matches an entity
                            curQA.goldNERType = article.entityText[curQA.gold]
                        else:
                            curQA.goldNERType = None
                            for start in range(
                                    curQA.goldStartIdx, curQA.goldEndIdx
                            ):  # if gold contains part of an entity
                                if start in article.entityStartIdx:
                                    curQA.goldNERType = article.entityStartIdx[
                                        start]
                                    break
                    distractorCandidatesNERTypeSet, condition = getDistractorCandidatesNERTypeSet(
                        curQA.questionCoarseType, curQA.questionFineType,
                        curQA.goldNERType)
                    for type in distractorCandidatesNERTypeSet:
                        curQA.distractorCandidates.update(
                            article.entityDict[type])

                    QAList.append(curQA)

                    print('P' + str(i + 1) + 'Q' + str(qid + 1) + ': ' +
                          curQA.question)
                    fw.write(curQA.question + '\t')
                    print('Gold:', curQA.gold)
                    fw.write((curQA.gold if curQA.gold else 'None') + '\t')

                    # print(curQA.distractorCandidates)
                    # t3 = time.time()
                    finallist = []
                    goldBERTEmb = getGoldBERTEmb(curQA.gold)
                    quesBERTEmb = getQuesBERTEmb(curQA.question)
                    for d in curQA.distractorCandidates:
                        if curQA.gold and (curQA.gold.lower() in d[0].lower()
                                           or d[0].lower()
                                           in curQA.gold.lower()):
                            continue
                        feature = Feature(embed, vocab, curQA.question,
                                          curQA.gold, d[0], curQA.goldStartIdx,
                                          d[1], article.posDict,
                                          article.entityBERTEmb, goldBERTEmb,
                                          quesBERTEmb)

                        score = feature.score
                        finallist.append([score, d[0]])
                    # t4 = time.time()
                    finallist.sort(reverse=True)
                    print('My candidate:', [c[1] for c in finallist[:3]])
                    fw.write(str([c[1] for c in finallist[:3]]) + '\t')
                    print('Ques API tag:', curQA.questionCoarseType, ',',
                          curQA.questionFineType)
                    fw.write((curQA.questionCoarseType if curQA.questionCoarseType else 'None') + '\t' + \
                             (curQA.questionFineType if curQA.questionFineType else 'None') + '\t')
                    print('Gold spaCy tag:', curQA.goldNERType)
                    fw.write(
                        (curQA.goldNERType if curQA.goldNERType else 'None') +
                        '\t')
                    print('distractorCandidatesNERTypeSet',
                          distractorCandidatesNERTypeSet)
                    fw.write(str(distractorCandidatesNERTypeSet) + '\t\n')
                    print('')
                    # print('t2-t1, t4-t3', t2-t1, t4-t3)
                    # print('WikiData Description:', wbSearchEntities(curQA.gold))

            article.QAList = QAList
            articleList.append(article)
Ejemplo n.º 59
0
 def __init__(self, field, phrases):
     self._phrases = phrases
     Feature.__init__(self, field)
Ejemplo n.º 60
0
 class Doc(BaseModel, self.Settings):
     stream = Feature(TextStream, store=True)
     final = Feature(TheLastWord, needs=stream, store=True)