Ejemplo n.º 1
0
def convert_type(instance, k, v, convert_timezone=True):
    if v is None:
        return None
    should_type = getattr(
        instance.__table__.columns, k).type.python_type
    if issubclass(should_type, basestring):
        return smart_unicode(v)
    elif should_type is datetime:
        localtime = parser.parse(v)
        if convert_timezone:
            return local_to_utc(localtime).replace(tzinfo=None)
        else:
            return localtime
    else:
        return should_type(v)
Ejemplo n.º 2
0
 def __init__(self, name, file):
     def through_none(f):
         return lambda x: None if x is None else f(x)
         
     def attr_apply(obj, attr, f):
         setattr(obj, key, f(getattr(obj, key)))
     
     self.title = None
     self.summary = None
     self.tags = []
     #Remove extension
     self.name = name[:name.rfind(".")]
     
     join_keys = set(["title", "summary"])
     markdown_keys = set(["summary"])
     bleach_keys = markdown_keys | set(["contents"])
     metadata_keys = (set(["tags"]) | join_keys | bleach_keys) - set("contents")
     
     #Process the text
     encoded = smart_unicode(file.read())
     self.contents = markdowner.reset().convert(encoded)
     
     #Import the relevant keys from the metadata into self
     for key in metadata_keys:
         if key in markdowner.Meta:
             value = markdowner.Meta[key]
             
             if key in join_keys:
                 value = "\n".join(value)
             
             setattr(self, key, value)
     
     for key in markdown_keys:
         mark = lambda x: markdowner.reset().convert(x)
         attr_apply(self, key, through_none(mark))
     
     #Bleach keys used as HTML
     for key in bleach_keys:
         attr_apply(self, key, through_none(bleacher))
Ejemplo n.º 3
0
 def test_smart_unicode(self):
     eq_(smart_unicode("Hello"), "Hello")
     eq_(smart_unicode("Привет"), u"Привет")
Ejemplo n.º 4
0
 def __unicode__(self):              # __unicode__ on Python 
     return smart_unicode(self.first_name+" "+self.last_name)
Ejemplo n.º 5
0
 def __unicode__(self):              # __unicode__ on Python 
     return smart_unicode(self.event + " " + self.category)
Ejemplo n.º 6
0
 def __unicode__(self):              # __unicode__ on Python
     return smart_unicode(self.category_id)
Ejemplo n.º 7
0
 def __unicode__(self):              # __unicode__ on Python 
     return smart_unicode(self.event_title)
Ejemplo n.º 8
0
 def __unicode__(self):              # __unicode__ on Python
     return smart_unicode(self.event.event_title + " " + self.user.user_id)
Ejemplo n.º 9
0
Archivo: final.py Proyecto: venurns/ocr
    r = orig[startY:endY, startX:endX]

    #configuration setting to convert image to string.
    configuration = ("-l eng --oem 1 --psm 8")
    ##This will recognize the text from the image of bounding box
    text = pytesseract.image_to_string(r, config=configuration)

    # append bbox coordinate and associated text to the list of results
    results.append(((startX, startY, endX, endY), text))

orig_image = orig.copy()

# Moving over the results and display on the image
for ((start_X, start_Y, end_X, end_Y), text) in results:
    # display the text detected by Tesseract
    text = (smart_unicode(text))
    translate = YandexTranslate(
        'trnsl.1.1.20200222T042110Z.10474b882cdf0dd2.08d7b3579f016b9d507aaa6e6a7ec116a445d121'
    )
    res = translate.translate(text, 'fr-en')
    result = str(res)
    first = result.rfind("[")
    last = result.rfind("]")
    text = (result[first + 3:last - 1])
    # Displaying text
    text = "".join([x if ord(x) < 128 else "" for x in text]).strip()
    cv2.rectangle(orig_image, (start_X, start_Y), (end_X, end_Y), (0, 255, 0),
                  2)
    cv2.putText(orig_image, text, (start_X, start_Y), cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (0, 255, 0), 2)
Ejemplo n.º 10
0
train_file = sys.argv[1]
w2vec_file = sys.argv[2]

w_dic = load_from_w2vec(w2vec_file)

for line in codecs.open(train_file, 'r', 'utf-8'):
    line = line.strip()
    if not line:
        continue
    w, t = line.strip().split()
    if not t in t_dic:
        t_dic[t] = len(t_dic)

fout_dic = codecs.open(out_dir + '/words.int', 'w', 'utf-8')
error = 0
for w, wid in w_dic.items():
    w = smart_unicode(w)
    fout_dic.write("%s %d\n" % (smart_unicode(w), wid))
    # if not w or  len(w.split()) != 1:
        # continue
    # try: 
        # w = smart_unicode(w)
        # fout_dic.write("%s %d\n" % (smart_unicode(w), wid))
    # except:
        # print('WARN: bad word')
        # print(w)
        # error += 1
fout_tag = codecs.open(out_dir + '/tags.int', 'w', 'utf-8')
for t, tid in t_dic.items():
    fout_tag.write("%s %d\n" % (t, tid))