Esempio n. 1
0
def isUnitSquare(rational_points):
    # xxx rpoints and points must correspond xxx    
    points = [ p.toFloat() for p in rational_points ]
    hull = ConvexHull(points)
    hull_points = [ rational_points[i] for i in hull.vertices ]
    line1 = Segment(Point(hull_points[0][0],hull_points[0][1]), Point(hull_points[1][0], hull_points[1][1]))
    line2 = Segment(Point(hull_points[1][0],hull_points[1][1]), Point(hull_points[2][0], hull_points[2][1]))

    # hull_points are guaranteed to be in counterclockwise order
    angle = None
    if line1.no_slope() and (not line2.no_slope() and line2.slope() == 0):
        angle = pi / 2
    elif line2.no_slope() and (not line1.no_slope() and line1.slope() == 0):
        angle = pi / 2
    else:
        l1f = line1.toFloat()
        l2f = line2.toFloat()
        print "lines %s %s" % (l1f, l2f)
        l1d = FloatPoint(l1f[1][0]-l1f[0][0],l1f[1][1]-l1f[0][1])
        l2d = FloatPoint(l2f[1][0]-l2f[0][0],l2f[1][1]-l2f[0][1])
        angle = atan2(l2d[1], l2d[0]) - atan2(l1d[1], l1d[0])
        
    area = poly_area_indexed(rational_points, hull.vertices)
    num_points = len(hull_points)
    angle_delta = fmod(angle - (pi/2), 2*pi)
    print "IsSquare: area:", area, "Num Points: ", num_points, "Angle Delta from 90 degrees, in radians: ", angle_delta
    return area == 1 and num_points == 4 and abs(angle_delta) < epsilon
Esempio n. 2
0
def _as_segment(data):
    p1 = data[tags.p1]
    p2 = data[tags.p2]
    result = Segment(p1, p2)
    if tags.color in data:
        result.color = _as_color(data[tags.color])
    return result
Esempio n. 3
0
    def __init__(self,
                 base=[0, 0, 0],
                 seglen=30.0,
                 ang=0.00643732691573,  # focal length of 2m
                 r=5.151,
                 focal=200.0, conic=False):
        '''
        Constructor

        Parameters:
            base:    the center point of the wide end of the segment
            seglen:  the axial length of each segment
            ang:     angle between the shell axis and the side of the front
                     segment
            r:       radius of the shell where the two segments meet
        '''
        if conic is False:
            # Paraboloid segment
            self.front = Segmentp(base=base, focal=focal, seglen=seglen, ang=ang, r1=r)
            backBase = [base[0], base[1], base[2] + seglen]
            # Hyperboloid segment
            self.back = Segmenth(base=backBase, focal=focal, seglen=seglen, ang=ang, r0=r)
        else:
            self.front = Segment(base=base, seglen=seglen, ang=ang, r1=r)
            backBase = [base[0], base[1], base[2]+seglen]
            self.back = Segment(base=backBase, seglen=seglen, ang=3*ang, r0=r)
Esempio n. 4
0
def create_non_intersecting_segments(pts1, pts2, segs2, angles, ref):
    """Creates list with the Segments that are visible from ref for the case
    when the line segment in pts1 is in front of the line segment in pts2.

    Args:
        pts1 (list[np.array]): list of 2D points that are the intersection of
            the rays from ref in the directions indicated by angles with the
            Segment closest to ref
        pts2 (list[np.array]): list of 2D points that are the intersection of
            the rays from ref in the directions indicated by angles with the
            Segment farthest to ref
        segs2 (list[Segment]): list of Segment between the points in pts2
        angles (list[float]): list of directions (radians) of the rays from ref
            the angles are obtained from the two extremes of each original
            Segment. The list always has 4 elements (even if repeated)
        ref (np.array): reference point
    """
    segments = []
    segment = Segment(pts1[1], pts1[2], angles[1], angles[2], ref)
    if pts1[0] is not None:
        segment.merge(Segment(pts1[0], pts1[1], angles[0], angles[1], ref))
    if pts1[3] is not None:
        segment.merge(Segment(pts1[2], pts1[3], angles[2], angles[3], ref))
    segments.append(segment)
    if segs2[0] is not None:
        segments = [Segment(pts2[0], pts2[1], angles[0], angles[1], ref)] + segments
    if segs2[2] is not None:
        segments.append(Segment(pts2[2], pts2[3], angles[2], angles[3], ref))
    return segments
Esempio n. 5
0
 def row_to_object(row):
     track = Track()
     track.id = row.id
     track.name = row.name
     track.segments_old = [Segment.row_to_object(segment_row) for segment_row in row.segments_old]
     track.segments_current = [Segment.row_to_object(segment_row) for segment_row in row.segments_current]
     track.valid_time_start = row.valid_time_start
     track.valid_time_stop = row.valid_time_stop
     return track
Esempio n. 6
0
def test_meets_conditions():
    segment = Segment(['syllabic', 'voice'], ['consonantal', 'continuant'])

    assert segment.meets_conditions({})
    assert segment.meets_conditions({'positive': ['syllabic']})
    assert segment.meets_conditions({'positive': ['syllabic', 'voice'],
                                     'negative': ['continuant']})
    assert not segment.meets_conditions({'positive': ['lateral'],
                                         'negative': ['continuant']})
Esempio n. 7
0
File: main.py Progetto: oniboni/A-34
def main():

    # cheap hex counter simulation :)
    test_input = "01 23 45 67 89 ab cd ef"   # "01 b0 c4 81 a6 c5 81 a0 5f 01 b7 5f"

    logic = LogicNet(int('b', 16))
    for char in test_input.replace(' ', ''):
        logic.read_input(int(char, 16))
        segment = Segment(logic.follow_paths())
        # print segment.print_segment()

    segment.print_chars()
Esempio n. 8
0
    def parse_segment(self, lc):
        name = strip(self.f.read(16))
        vmaddr = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f)
        vmsize = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f)
        offset = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f)
        segsize = get_int(self.f) if self.macho.is_32_bit() else get_ll(self.f)
        maxprot = get_int(self.f)
        initprot = get_int(self.f)
        nsects = get_int(self.f)
        flags = get_int(self.f)

        if self.macho.is_little():
            vmaddr = little(vmaddr, 'I') if self.macho.is_32_bit() \
                else little(vmaddr, 'Q')
            vmsize = little(vmsize, 'I') if self.macho.is_32_bit() \
                else little(vmsize, 'Q')
            offset = little(offset, 'I') if self.macho.is_32_bit() \
                else little(offset, 'Q')
            segsize = little(segsize, 'I') if self.macho.is_32_bit() \
                else little(segsize, 'Q')
            maxprot = little(maxprot, 'I')
            initprot = little(initprot, 'I') 
            nsects = little(nsects, 'I')
            flags = little(flags, 'I')

        maxprot = dictionary.protections[maxprot & 0b111]
        initprot = dictionary.protections[initprot & 0b111]        

        entropy = self.get_segment_entropy(offset, segsize)

        segment = Segment(cmd=lc.cmd, size=lc.size, name=name,
                          vmaddr=vmaddr, vmsize=vmsize, offset=offset,
                          segsize=segsize, maxprot=maxprot, initprot=initprot,
                          nsects=nsects, entropy=entropy)

        if self.macho.is_32_bit():
            sect_size = 68
        else:
            sect_size = 80
        for i in range(segment.nsects):
            if self.f.tell() + sect_size > self.file_size:
                data = {
                    'offset': self.f.tell(),
                    'file_size': self.file_size
                }
                a = Abnormality(title='SECTION OUT OF BOUNDS', data=data)
                break
            sect = self.parse_section()
            segment.add_sect(sect)

        self.parse_segment_flags(segment, flags)
        self.macho.add_lc(segment)
Esempio n. 9
0
    def parseSegment(self, lc):
        name = strip(self._f.read(16))
        vmaddr = getInt(self._f) if self._macho.is32Bit() else getLL(self._f)
        vmsize = getInt(self._f) if self._macho.is32Bit() else getLL(self._f)
        offset = getInt(self._f) if self._macho.is32Bit() else getLL(self._f)
        segsize = getInt(self._f) if self._macho.is32Bit() else getLL(self._f)
        maxprot = getInt(self._f)
        initprot = getInt(self._f)
        nsects = getInt(self._f)
        flags = getInt(self._f)

        if self._macho.isLittle():
            vmaddr = little(vmaddr, 'I') if self._macho.is32Bit() \
                else little(vmaddr, 'Q')
            vmsize = little(vmsize, 'I') if self._macho.is32Bit() \
                else little(vmsize, 'Q')
            offset = little(offset, 'I') if self._macho.is32Bit() \
                else little(offset, 'Q')
            segsize = little(segsize, 'I') if self._macho.is32Bit() \
                else little(segsize, 'Q')
            maxprot = little(maxprot, 'I')
            initprot = little(initprot, 'I') 
            nsects = little(nsects, 'I')
            flags = little(flags, 'I')

        maxprot = dictionary.protections[maxprot & 0b111]
        initprot = dictionary.protections[initprot & 0b111]        

        segment = Segment(cmd=lc.getCmd(), size=lc.getSize(), name=name,
                          vmaddr=vmaddr, vmsize=vmsize, offset=offset,
                          segsize=segsize, maxprot=maxprot, initprot=initprot,
                          nsects=nsects)

        if self._macho.is32Bit():
            sect_size = 68
        else:
            sect_size = 80
        for i in range(segment.getNSects()):
            if self._f.tell() + sect_size > self._file_size:
                data = {
                    'offset': self._f.tell(),
                    'file_size': self._file_size
                }
                a = Abnormality(title='SECTION OUT OF BOUNDS', data=data)
                break
            sect = self.parseSection()
            segment.addSect(sect)

        self.parseSegmentFlags(segment, flags)
        self._macho.addLC(segment)
Esempio n. 10
0
def object_hook(d):
    """
    Usage
    -----
    >>> import simplejson as json
    >>> with open('file.json', 'r') as f:
    ...   json.load(f, object_hook=object_hook)
    """

    from segment import Segment
    from timeline import Timeline
    from annotation import Annotation
    from transcription import Transcription

    if PYANNOTE_JSON_SEGMENT in d:
        return Segment.from_json(d)

    if PYANNOTE_JSON_TIMELINE in d:
        return Timeline.from_json(d)

    if PYANNOTE_JSON_ANNOTATION in d:
        return Annotation.from_json(d)

    if PYANNOTE_JSON_TRANSCRIPTION in d:
        return Transcription.from_json(d)

    return d
    def __init__(self, track, comp_location, start, orig_duration, new_duration):
        """Create a time-stetched segment. 

        It acts like a :py:class:`radiotool.composer.Segment` but you
        can specify the target duration. The segment will then
        resample its frames to meet this duration.

        :param track: Track to slice
        :type track: :py:class:`radiotool.composer.Track`
        :param float comp_location: Location in composition to play this segment (in seconds)
        :param float start: Start of segment (in seconds)
        :param float orig_duration: Original duration of segment (in seconds)
        :param float new_duration: Target (stretched) duration of segment (in seconds)
        """
        Segment.__init__(self, track, comp_location, start, new_duration)
        self.orig_duration = int(orig_duration * self.samplerate)
Esempio n. 12
0
def token_to_segment(token, segment_list, diacritic_list):
    '''Converts a string token in IPA to Segment object, given
    a list of dictionaries representing segments and the same representing
    diacritics.'''

    diacritic_strings = [segment['IPA'] for segment in diacritic_list]

    # Isolate the base IPA segment string
    base_string = ''.join(filter(lambda x: x not in diacritic_strings,
                                 token))

    # Isolate an iterable of diacritics present
    diacritics = [diacritic for diacritic in diacritic_list
                  if diacritic['IPA'] in token]

    # Initialise the base Segment
    segment = Segment.from_dictionary(find_segment(base_string,
                                                   segment_list))

    # Add each diacritic feature to the segment
    for diacritic in diacritics:
        diacritic_segment = Segment(diacritic['applies'].get('positive', []),
                                    diacritic['applies'].get('negative', []))
        segment = segment + diacritic_segment

    return segment
Esempio n. 13
0
 def __init__(self,
              base = [0,0,0],
              seglen = 30.0,
              focal = 200.0,
              radii = [5.151,4.9,4.659,4.429,4.21,4.0,3.799],
              angles = None
              ):
     '''
     Constructor  
     
     Parameters:
         base:    the center point of the wide end of the segment
         seglen:  the axial length of each segment
         focal:   the focal length, measured from the center of the module
         radii:   a list of radii, one for each shell from biggest to smallest
         angles:  optional parameter to overwrite the shell angles computed by constructor
     '''
     if angles is None:
         angles = calcShellAngle(radii,focal) 
     elif len(radii) != len(angles):
         raise ValueError('number of radii and number of angles do not match')
     
     self.shells = []
     for i,r in enumerate(radii):
         self.shells.append(Shell(base=base, seglen=seglen, ang=angles[i], r=r))
     
     # inner core (blocks rays going through center of module)
     r0 = self.shells[-1].back.r0
     r1 = r0 - seglen * tan(4*angles[-1])
     ang = atan((r0-r1)/(2*seglen))
     self.core = Segment(base=base, seglen=2*seglen, ang=ang, r0=r0)
     self.coreFaces = [Circle(center=base,normal=[0,0,1],radius=r0),
                       Circle(center=[base[0],base[1],base[2]+2*seglen],normal=[0,0,-1],radius=r1)]
Esempio n. 14
0
def test_initialisation():
    feature_dictionary = {'stress': '+', 'long': '-', 'continuant': '0',
                          'IPA': 'b'}

    segment = Segment.from_dictionary(feature_dictionary)

    assert segment.positive == ['stress']
    assert segment.negative == ['long']
Esempio n. 15
0
 def from_json(cls, data):
     uri = data.get(PYANNOTE_URI, None)
     modality = data.get(PYANNOTE_MODALITY, None)
     annotation = cls(uri=uri, modality=modality)
     for s, track, label in data[PYANNOTE_JSON_ANNOTATION]:
         segment = Segment.from_json(s)
         annotation[segment, track] = label
     return annotation
Esempio n. 16
0
 def __init__(self,
              base = [0,0,0],
              seglen = 30.0,
              ang = 0.00643866102405, # focal length of 2m
              r = 5.151
              ):
     '''
     Constructor
     
     Parameters:
         base:    the center point of the wide end of the segment
         seglen:  the axial length of each segment
         ang:     angle between the shell axis and the side of the front segment
         r:       radius of the shell where the two segments meet
     '''
     self.front = Segment(base=base, seglen=seglen, ang=ang, r1=r)
     backBase = [base[0], base[1], base[2]+seglen]
     self.back = Segment(base=backBase, seglen=seglen, ang=3*ang, r0=r)
Esempio n. 17
0
def test_addition():
    feature_dictionary = {'stress': '+', 'syllabic': '-', 'continuant': '0',
                          'IPA': 'b'}

    segment = Segment.from_dictionary(feature_dictionary)

    syllabic_diacritic = Segment(['syllabic'], ['voice'])

    addition = segment + syllabic_diacritic
    assert addition.positive == ['stress', 'syllabic']
    assert addition.negative == ['voice']
Esempio n. 18
0
    def from_json(cls, data):

        uri = data.get(PYANNOTE_URI, None)
        modality = data.get(PYANNOTE_MODALITY, None)
        annotation = cls(uri=uri, modality=modality)
        for one in data[PYANNOTE_JSON_CONTENT]:
            segment = Segment.from_json(one[PYANNOTE_SEGMENT])
            track = one[PYANNOTE_TRACK]
            label = one[PYANNOTE_LABEL]
            annotation[segment, track] = label

        return annotation
Esempio n. 19
0
 def row_to_object(row):
     run = Run()
     run.id = row.id
     run.user_id = row.run_info.user_id
     run.user_bt_name = row.run_info.user_bt_name
     run.segment = Segment.row_to_object(row.run_info.segment)
     print 'type of time start cell'
     print str(type(row.run_info.time_start))
     run.time_start = row.run_info.time_start
     run.time_stop = row.run_info.time_stop
     run.time_span_ms = row.run_info.time_span_ms
     return run
def benchmark_match_accuracy(segments, diacritics, filename):
    '''Convert all given segments to feature strings, then convert back to
    segments. Use the given feature string file. Return the percentage accuracy
    of the conversion.

    '''
    feature_strings = load_feature_strings(path.join(base_directory, 'engine',
                                                     'data', filename))

    print('Loaded {0} feature strings'.format(len(feature_strings)))

    base_matches = []
    matches = []

    deparse.initialise_cache()

    for segment in segments:
        base_segment = Segment.from_dictionary(segment)

        base_matches.append((segment['IPA'],
                             deparse.segment_match(feature_strings,
                                                   base_segment)))
        matches.append((segment['IPA'], deparse.segment_match(feature_strings,
                                                              base_segment)))

        for diacritic in diacritics:
            IPA_representation = segment['IPA'] + diacritic['IPA']

            if base_segment.meets_conditions(diacritic['conditions']):
                diacritic_segment = base_segment + Segment(diacritic['applies'].get('positive', []),
                                                           diacritic['applies'].get('negative', []))

                matches.append((IPA_representation,
                                deparse.segment_match(feature_strings,
                                                      diacritic_segment)))

    print('Calculating base accuracy...')
    base_successes = 0
    for match in base_matches:
        if match[0] == match[1]:
            base_successes += 1
        else:
            print('\tExpected {0}, deparsed {1}'.format(match[0], match[1]))

    print('Calculating diacritic accuracy...')
    successes = len([match for match in matches if match[0] == match[1]])

    return (base_successes / len(base_matches)), (successes / len(matches))
Esempio n. 21
0
def test_setters():
    feature_dictionary = {'stress': '+', 'long': '-', 'continuant': '0',
                          'IPA': 'b'}

    segment = Segment.from_dictionary(feature_dictionary)

    segment.add_positive('long')
    assert segment.positive == ['stress', 'long']
    assert segment.negative == []

    segment.add_negative('stress')
    assert segment.positive == ['long']
    assert segment.negative == ['stress']

    segment.add_negative('stress')
    assert segment.positive == ['long']
    assert segment.negative == ['stress']
def _travel_path(origin_id, start_id):
	path = []
	branch_encounted = False
	head = start_id
	while True:
		path.append(head)
		b = Segment.by_id(head)
		if not b:
			b = Block.by_id(head)
		if origin_id == head:
			break
		if b.has_return():
			break
		if b.branch() and not b.branch().has_return():
			branch_encounted = True
			break
		head = b.passthru().id
	return path, branch_encounted
def _new_segment(segment, segments, included_in, members):
	id = segment.id
	segments.append(segment.id)

	m = []
	if segment.type == Segment.BRANCH:
		m = segment.left[:]
		m.extend(segment.right[:])
	elif segment.type == Segment.LOOP:
		m = segment.body[:]

	sm = []
	for block_id in m:
		if Segment.by_id(block_id):
			if block_id not in included_in: # temporary
				included_in[block_id] = id
			sm.extend(members[block_id])
			sm.remove(block_id) # remove dup
	
	m.append(id)
	m.extend(sm)
	members[id] = m
Esempio n. 24
0
track_features_small = pickle.load(open('trackToFeatureTuples.p', 'rb'))
clusters = km.kmeans(track_features_small, 10)

track_features_full = j.constructNamesToDict()
cluster_num = 0

for cluster in clusters:
    # list of Segment objects
    cluster_30segments = []
    starts = []
    for track in cluster:
        segments = track_features_full[track]["segments"]

        segment30 = []
        for i in xrange(len(segments)):
            s = Segment(track, i, segments[i])
            segment30.append(s)
            if (i + 1) % 30 == 0:
                cluster_30segments.append(segment30)
                if i + 1 == 30:
                    starts.append(segment30)
                segment30 = []

    start = random.choice(starts)

    sp = mashupSearchProblem(cluster_30segments, start, 5, sc.segmentsCost)
    ucs_alg = ucs.UniformCostSearch()
    ucs_alg.solve(sp)
    mashup = [s for s in start] + [s for a in ucs_alg.actions for s in a]

    segment_titles = []
Esempio n. 25
0
 def from_json(cls, data):
     uri = data.get(PYANNOTE_URI, None)
     segments = [Segment.from_json(s) for s in data[PYANNOTE_JSON_CONTENT]]
     return cls(segments=segments, uri=uri)
Esempio n. 26
0
 def from_json(cls, data):
     segments = [Segment.from_json(s) for s in data[PYANNOTE_JSON_TIMELINE]]
     uri = data.get(PYANNOTE_URI, None)
     return cls(segments=segments, uri=uri)
Esempio n. 27
0
class Shell:
    '''
    A shell consists of two segments, one in the front and one behind. The base location
    of the front segment has a smaller z value than that of the back segement. Thus, rays
    coming from sources in the negative z range will enter the front end first.
    '''

    def __init__(self,
                 base = [0,0,0],
                 seglen = 30.0,
                 ang = 0.00643866102405, # focal length of 2m
                 r = 5.151
                 ):
        '''
        Constructor
        
        Parameters:
            base:    the center point of the wide end of the segment
            seglen:  the axial length of each segment
            ang:     angle between the shell axis and the side of the front segment
            r:       radius of the shell where the two segments meet
        '''
        self.front = Segment(base=base, seglen=seglen, ang=ang, r1=r)
        backBase = [base[0], base[1], base[2]+seglen]
        self.back = Segment(base=backBase, seglen=seglen, ang=3*ang, r0=r)
        
    def getSurfaces(self):
        '''
        Returns a list of surfaces
        '''
        return [self.front,self.back]
        
    def plot2D(self, axes, color = 'b'):
        '''
        Plots a 2d cross section of the shell 
        '''
        self.front.plot2D(axes, color)
        self.back.plot2D(axes, color)
 
        # plot rays
        ang = self.front.ang
        r = self.back.r1
        z = self.back.base[2]
        d = r*tan((pi/2)-4*ang)
        axes.plot((2*z,2*z+d),(r,0),'y:')
        axes.plot((2*z,2*z+d),(-r,0),'y:')   
    
    def plot3D(self, axes, color = 'b'):
        '''
        Generates a 3d plot of the shell in the given figure
        '''
        self.front.plot3D(axes,color)
        self.back.plot3D(axes,color)
        
    def targetFront(self,a,b):
        '''
        Takes two list arguments of equal size, the elements of which range from 0 to 1.
        Returns an array of points that exist on the circle defined by the wide end of 
        the shell. 
        '''
        return self.front.targetFront(a,b)
    
    def targetBack(self,a,b):
        '''
        Takes two list arguments of equal size, the elements of which range from 0 to 1.
        Returns an array of points that exist on the circle defined by the small end of 
        the shell. 
        '''
        return self.back.targetBack(a,b)
     
        
Esempio n. 28
0
 def createSegment(self, vt1, vt2):
     segment = Segment(self)
     segment.setPos(vt1, vt2)
     return segment
Esempio n. 29
0
 def __init__(self, host, port):
     self.load()
     self.segmentor= Segment(host, port)
Esempio n. 30
0
def set_process(sentences, delimiter='<!>', line_break='\n', option=None):
    dep_out = False
    phr_out = False
    relaxed_out = False
    laa_out = False
    collx_out = False
    use_collx = False
    phrases = False
    print_tree = False
    phr_verbs_path = ''
    pos_tags = True
    desamb_hacks = False
    
    grammar = os.path.join(set_path, 'grammar.set')
    lw_collx = os.path.join(set_path, 'data', 'lemma-word')
    ll_collx = os.path.join(set_path, 'data', 'lemma-lemma')
    lt_collx = os.path.join(set_path, 'data', 'lemma-tag')
    ls_collx = os.path.join(set_path, 'data', 'lemma-semclass')
    ss_collx = os.path.join(set_path, 'data', 'semclass-semclass')    

    if   option == '-d': dep_out = True    
    elif option == '-p': phr_out = True    
    elif option == '--cout': collx_out = True
    elif option == '--phrases': phrases = True
    elif option == '--laa': laa_out = True
    

    phr_verbs = {}
    if phr_verbs_path:
        for line in open(phr_verbs_path):
            llist = line.split()
            if llist[0] not in phr_verbs: phr_verbs[llist[0]] = []
            phr_verbs[llist[0]].append(tuple(llist[1:]))

    grammar = Grammar(grammar)
    if use_collx:
        parser = Parser(grammar, lw_collx_root=lw_collx,
            ll_collx_root=ll_collx, lt_collx_root=lt_collx,
            ls_collx_root=ls_collx, ss_collx_root=ss_collx)
    else:
        parser = Parser(grammar)

    def sigusr1_handler(*args):
        #open('/tmp/set-sigusr1-last-sentence', 'w').write(''.join(lines))
        sys.exit(1)

    signal.signal(signal.SIGUSR1, sigusr1_handler)
    chunks =  sentences.split(delimiter)
    signal.alarm(len(chunks) * 10)

    f = TextFile()
    for sentence in chunks:
        sentence_chunks = [w.replace('\t', ' ') 
            for w in sentence.split(line_break) if w and w[0] != "<"]
        if len(sentence_chunks) == 0:
            continue
        s = Segment(sentence_chunks, pos_tags=pos_tags,
            desamb_hacks=desamb_hacks, phr_verbs=phr_verbs)
        parser.parse(s)
        if dep_out: s.print_dep_tree(root=False, file=f)
        if laa_out: s.print_laa_tree(file=f)
        if phr_out: s.print_phr_tree(file=f)
        if relaxed_out: s.print_relaxed_tree(file=f)
        if collx_out: s.print_collx_out(file=f)
        if print_tree: s.print_tree(file=f)
        if phrases: s.print_phrases(file=f)
        f.write('\n')
    return f.get()
Esempio n. 31
0
class Classfier():
    segmentor = None
    features={}
    categorys={}
    model={}

    def __init__(self, host, port):
        self.load()
        self.segmentor= Segment(host, port)

    def __del__(self):
        if self.segmentor:
            self.segmentor = None
        self.features = None 
        self.categorys = None
        self.model = None

    def load(self):
        feature_file='thrift/etc/feature.txt'
        category_file='thrift/etc/catid.txt'
        model_file='thrift/data/knn.model'

# load centoroid
        for line in open(model_file):
            line = line.strip()
            if not line:
                break
            index, rest=line.split('|')
            centoroid = {}
            for part in rest.split(" "):
                k, v = part.split(':')
                centoroid[int(k)] = float(v)
            self.model[int(index)] = centoroid
        
# load features
        for line in open(feature_file):
            line=line.strip()
            parts = line.split("\t")
            feature = {
                "index": int(parts[0]),
                "idf" : float(parts[2])
            }
            word = parts[1]
            self.features[word] = feature

# load category
        for line in open(category_file):
            line = line.strip()
            catid, leaf, root = line.split("\t")
            self.categorys[int(catid)] = leaf
    

    def predict(self, content):
        features = self.text2Feature(content)
        return self.maxProbCat(features);

    def segment(self, content):
        self.segmentor.connect()
        result  = self.segmentor.segment(content)
        self.segmentor.close()
        return self.segmentor.parse(result)

    def similarity(self, docFeatures, centoroid):
        value = 0.0
        for k,v in enumerate(docFeatures):
            item = docFeatures[v]
            if (centoroid.has_key(v)):
                value += item['weight'] * centoroid[v] 
        return value

    def text2Feature(self, content):
        keywords = self.segment(content)
        #print keywords
        features = {}
        for wordTf in keywords:
            word, tf = wordTf.split(':')
            tf = float(tf)
            if self.features.has_key(word):
                feature = self.features[word]
                index = feature["index"]
                idf   = feature["idf"]
                features[index] = {"word":word, "weight":tf*idf}
        #print features
        return features

    def maxProbCat(self, features):
        maxScore = 0.0
        probCat  = -1
        #docDotProduct = math.sqrt(sum(f.weight * f.weight for f in features))
        #if docDotProduct == 0:
            #return probCat

        for k, v in enumerate(self.model):
            score = self.similarity(features, self.model[v]);
            #print self.categorys[v], int(score * 10000)
            if score > maxScore:
                maxScore = score
                probCat = v

        label = self.categorys[probCat]
        return label