def convert_type(instance, k, v, convert_timezone=True): if v is None: return None should_type = getattr( instance.__table__.columns, k).type.python_type if issubclass(should_type, basestring): return smart_unicode(v) elif should_type is datetime: localtime = parser.parse(v) if convert_timezone: return local_to_utc(localtime).replace(tzinfo=None) else: return localtime else: return should_type(v)
def __init__(self, name, file): def through_none(f): return lambda x: None if x is None else f(x) def attr_apply(obj, attr, f): setattr(obj, key, f(getattr(obj, key))) self.title = None self.summary = None self.tags = [] #Remove extension self.name = name[:name.rfind(".")] join_keys = set(["title", "summary"]) markdown_keys = set(["summary"]) bleach_keys = markdown_keys | set(["contents"]) metadata_keys = (set(["tags"]) | join_keys | bleach_keys) - set("contents") #Process the text encoded = smart_unicode(file.read()) self.contents = markdowner.reset().convert(encoded) #Import the relevant keys from the metadata into self for key in metadata_keys: if key in markdowner.Meta: value = markdowner.Meta[key] if key in join_keys: value = "\n".join(value) setattr(self, key, value) for key in markdown_keys: mark = lambda x: markdowner.reset().convert(x) attr_apply(self, key, through_none(mark)) #Bleach keys used as HTML for key in bleach_keys: attr_apply(self, key, through_none(bleacher))
def test_smart_unicode(self): eq_(smart_unicode("Hello"), "Hello") eq_(smart_unicode("Привет"), u"Привет")
def __unicode__(self): # __unicode__ on Python return smart_unicode(self.first_name+" "+self.last_name)
def __unicode__(self): # __unicode__ on Python return smart_unicode(self.event + " " + self.category)
def __unicode__(self): # __unicode__ on Python return smart_unicode(self.category_id)
def __unicode__(self): # __unicode__ on Python return smart_unicode(self.event_title)
def __unicode__(self): # __unicode__ on Python return smart_unicode(self.event.event_title + " " + self.user.user_id)
r = orig[startY:endY, startX:endX] #configuration setting to convert image to string. configuration = ("-l eng --oem 1 --psm 8") ##This will recognize the text from the image of bounding box text = pytesseract.image_to_string(r, config=configuration) # append bbox coordinate and associated text to the list of results results.append(((startX, startY, endX, endY), text)) orig_image = orig.copy() # Moving over the results and display on the image for ((start_X, start_Y, end_X, end_Y), text) in results: # display the text detected by Tesseract text = (smart_unicode(text)) translate = YandexTranslate( 'trnsl.1.1.20200222T042110Z.10474b882cdf0dd2.08d7b3579f016b9d507aaa6e6a7ec116a445d121' ) res = translate.translate(text, 'fr-en') result = str(res) first = result.rfind("[") last = result.rfind("]") text = (result[first + 3:last - 1]) # Displaying text text = "".join([x if ord(x) < 128 else "" for x in text]).strip() cv2.rectangle(orig_image, (start_X, start_Y), (end_X, end_Y), (0, 255, 0), 2) cv2.putText(orig_image, text, (start_X, start_Y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
train_file = sys.argv[1] w2vec_file = sys.argv[2] w_dic = load_from_w2vec(w2vec_file) for line in codecs.open(train_file, 'r', 'utf-8'): line = line.strip() if not line: continue w, t = line.strip().split() if not t in t_dic: t_dic[t] = len(t_dic) fout_dic = codecs.open(out_dir + '/words.int', 'w', 'utf-8') error = 0 for w, wid in w_dic.items(): w = smart_unicode(w) fout_dic.write("%s %d\n" % (smart_unicode(w), wid)) # if not w or len(w.split()) != 1: # continue # try: # w = smart_unicode(w) # fout_dic.write("%s %d\n" % (smart_unicode(w), wid)) # except: # print('WARN: bad word') # print(w) # error += 1 fout_tag = codecs.open(out_dir + '/tags.int', 'w', 'utf-8') for t, tid in t_dic.items(): fout_tag.write("%s %d\n" % (t, tid))