def pickup(self): # создаем браузер, которым будем ходить по wmtake.ru b = Browser() # сщздаем анализатор, которым будем распознавать капчу a = Analyzer('wmtake', self.symsize, self.charset) a.load() b.show() log.debug('LOADING PAGE WITH WM BONUS') b.get('http://fotocity.info/m.base/bonus.php') while (True): log.debug('SAVING CAPTCHA') captcha = b.js('$("#scode-pic img")[0].src') #b.save(captcha, '/home/polzuka/inspirado/captcha/wmtake/%02d.gif' % i) log.debug('CAPTCHA TRANSFORMING') try: t = Transformer('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 150, CV_THRESH_BINARY_INV) t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: raise Exception except Exception, e: log.debug(e) log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) log.debug('LOADING PAGE WITH WM BONUS') b.get('http://wmtake.ru/m.base/bonus.php') continue t.normolize('origsplit', 'breaksplit', self.symsize) symbols = t.slice('origsplit') log.debug('RECOGNITION CAPTCHA') code = a.captcha(symbols) log.debug('ANALYZE RESULT: %s' % colorize(code)) del t print code log.debug('FILLING FIELDS') b.js("$('#scode').val('%s')" % code) b.js("$('#purse').val('R%s')" % self.purse) b.js("$('div.news_box div.bn p').click()") b.sleep(10) if not b.js("$('#mess-exec:visible').length"): log.debug('FINISH') break log.debug('INCORRECT CAPCTHA RECOGNITION') log.debug('LOADING PAGE WITH WM BONUS') b.js("$('#mess-exec p').click()")
def pickup(self): # создаем браузер, которым будем ходить по wmtake.ru b = Browser() # сщздаем анализатор, которым будем распознавать капчу a = Analyzer(self.site, self.symsize, self.charset) a.load() b.show() log.debug('LOADING PAGE WITH WM BONUS') b.get('http://wmtake.ru/m.base/bonus.php') while(True): log.debug('SAVING CAPTCHA') captcha = b.js('$("#scode-pic img")[0].src') #b.save(captcha, '/home/polzuka/inspirado/captcha/wmtake/%02d.gif' % i) log.debug('CAPTCHA TRANSFORMING') try: t = Transformer('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 150, CV_THRESH_BINARY_INV) t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: raise Exception except Exception, e: log.debug(e) log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) log.debug('LOADING PAGE WITH WM BONUS') b.get('http://wmtake.ru/m.base/bonus.php') continue t.normolize('origsplit', 'breaksplit', self.symsize) symbols = t.slice('origsplit') log.debug('RECOGNITION CAPTCHA') code = a.captcha(symbols) log.debug('ANALYZE RESULT: %s' % colorize(code)) del t print code log.debug('FILLING FIELDS') b.js("$('#scode').val('%s')" % code) b.js("$('#purse').val('R%s')" % self.purse) b.js("$('div.news_box div.bn p').click()") b.sleep(10) if not b.js("$('#mess-exec:visible').length"): log.debug('FINISH') break log.debug('INCORRECT CAPCTHA RECOGNITION') log.debug('LOADING PAGE WITH WM BONUS') b.js("$('#mess-exec p').click()")
def pickup(self): # создаем браузер, которым будем ходить по wmtake.ru b = Browser() # создаем анализатор, которым будем распознавать капчу a = Analyzer(self.site, self.symsize, self.charset) a.load() b.show() while (True): log.debug('LOADING PAGE WITH WM BONUS') b.get('http://exchangecity.ru/?cmd=bonus') log.debug('SAVING CAPTCHA') captcha = 'http://exchangecity.ru/include/anti_robot.php' #b.save(captcha, '/home/polzuka/inspirado/captcha/wmtake/%02d.gif' % i) log.debug('CAPTCHA TRANSFORMING') t = Transformer('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 200, CV_THRESH_BINARY_INV) t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) continue t.normolize('origsplit', 'breaksplit', self.symsize) symbols = t.slice('origsplit') log.debug('RECOGNITION CAPTCHA') code = a.captcha(symbols) log.debug('ANALYZE RESULT: %s' % colorize(code)) del t print code log.debug('FILLING FIELDS') b.js("$('input[name = img]').val('%s')" % code) b.js("$('input[name = WALLET_BONUS]').val('R%s')" % self.purse) b.js("$('input[name = get_bonus]').click()") b.sleep(1) if not b.js( "$('font.textbl:contains(Вы получили бонус в размере)').length" ): log.debug('FINISH') break log.debug('INCORRECT CAPCTHA RECOGNITION') self.quit()
def pickup(self): # создаем браузер, которым будем ходить по wmtake.ru b = Browser() # создаем анализатор, которым будем распознавать капчу a = Analyzer(self.site, self.symsize, self.charset) a.load() b.show() while(True): log.debug('LOADING PAGE WITH WM BONUS') b.get('http://exchangecity.ru/?cmd=bonus') log.debug('SAVING CAPTCHA') captcha = 'http://exchangecity.ru/include/anti_robot.php' #b.save(captcha, '/home/polzuka/inspirado/captcha/wmtake/%02d.gif' % i) log.debug('CAPTCHA TRANSFORMING') t = Transformer('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 200, CV_THRESH_BINARY_INV) t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) continue t.normolize('origsplit', 'breaksplit', self.symsize) symbols = t.slice('origsplit') log.debug('RECOGNITION CAPTCHA') code = a.captcha(symbols) log.debug('ANALYZE RESULT: %s' % colorize(code)) del t print code log.debug('FILLING FIELDS') b.js("$('input[name = img]').val('%s')" % code) b.js("$('input[name = WALLET_BONUS]').val('R%s')" % self.purse) b.js("$('input[name = get_bonus]').click()") b.sleep(1) if not b.js("$('font.textbl:contains(Вы получили бонус в размере)').length"): log.debug('FINISH') break log.debug('INCORRECT CAPCTHA RECOGNITION') self.quit()
def train(self): log.info('prepare(): trainpath: %s, netpath: %s' % colorize( (self.trainpath, self.netpath))) self.ann.train_on_file(self.trainpath, self.maxEpochs, self.epochsBetweenReports, self.desiredError) self.ann.save(self.netpath)
def prepare(self): log.info('prepare(): trainpath: %s, symbolpath: %s' % colorize( (self.trainpath, self.symbolpath))) trainfile = open(self.trainpath, 'w') filter = re.compile(SYMBOL_FILTER) symbols = [] for name in os.listdir(self.symbolpath): if filter.match(name): symbols.append(name) trainfile.write('%d %d %d\n' % (len(symbols), self.input, self.output)) for name in symbols: img = cvLoadImage(SYMBOLS_PATH + self.site + '/' + name, CV_LOAD_IMAGE_GRAYSCALE) data = self.getdata(img) trainfile.write('%s\n' % ' '.join(map(str, data))) c = name[0] n = self.charset.index(c) trainfile.write('-1 ' * n + '1' + ' -1' * (self.output - n - 1) + '\n') trainfile.close()
def __init__(self, site, picsize, charset): self.site = site self.trainpath = TRAINS_PATH + self.site + '.trn' self.netpath = NETS_PATH + self.site + '.ann' self.symbolpath = SYMBOLS_PATH + self.site self.charset = charset self.input = picsize[0] * picsize[1] self.output = len(charset) self.hidden = self.input / 3 self.layers = 3 self.desiredError = 0.00006 self.maxEpochs = 50000 self.epochsBetweenReports = 1000 self.ann = libfann.neural_net() self.ann.create_sparse_array(self.layers, (self.input, self.hidden, self.output)) self.ann.set_activation_function_hidden( libfann.SIGMOID_SYMMETRIC_STEPWISE) self.ann.set_activation_function_output( libfann.SIGMOID_SYMMETRIC_STEPWISE) log.info( 'init(): site: %s, size: %sx%s, charset: %s, input: %s, hidden: %s, output: %s' % colorize((site, picsize[0], picsize[1], charset, self.input, self.hidden, self.output)))
def captcha(self, symbols): result = '' for smb in symbols: data = self.getdata(smb) result += self.symbol(data) log.info('captcha(): result: %s' % colorize(result)) return result
def collect(self): b = Browser() for i in xrange(200): log.info('LOAD PAGE WITH CAPTCHA') b.get('http://sms-webmoney.ru/') captcha = 'http://sms-webmoney.ru/img.php' b.save(captcha, CAPTCHA_PATH + self.site + '/%02d.png' % i) t = Transformer() t.load('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 3, 3) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 200, CV_THRESH_BINARY_INV) radius = 2 kernel = cvCreateStructuringElementEx(radius * 2 + 1, radius * 2 + 1, radius, radius, CV_SHAPE_ELLIPSE) t.morphology('morphology', t['binarize'], 0, 1, kernel) t.contourSplit('breaksplit', t['morphology'], 0.01) if len(t.symbols) != self.symqty: log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) continue t.normolize('origsplit', 'breaksplit', self.symsize) t.savesymbols('origsplit', SYMBOLS_PATH + self.site, '%02d' % i) del t
def collect(self): b = Browser() for i in xrange(100): log.info('LOAD PAGE WITH CAPTCHA') b.get('http://exchangecity.ru/?cmd=bonus') captcha = 'http://exchangecity.ru/include/anti_robot.php' b.save(captcha, CAPTCHA_PATH + Implem.name + '/%02d.png' % i) t = Transformer() t.load('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 200, CV_THRESH_BINARY_INV) ''' radius = 3 kernel = cvCreateStructuringElementEx(radius * 2 + 1, radius * 2 + 1, radius, radius, CV_SHAPE_ELLIPSE) t.morphology('morphology', t['binarize'], 1, 1, kernel) ''' t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) continue t.normolize('origsplit', 'breaksplit', Implem.size) t.savesymbols('origsplit', SYMBOLS_PATH + Implem.name, '%02d' % i) del t
def slice(self, srckey): log.debug('slice(): srckey: %s, len: %s' % colorize( (srckey, len(self.symbols)))) res = [] for smb in self.symbols: res.append(smb[srckey]) return res
def normolize(self, dstkey, srckey, normsize): log.debug('normolize(): dstkey: %s, srckey: %s, size: %sx%s' % colorize((dstkey, srckey, normsize[0], normsize[1]))) for smb in self.symbols: img = smb.transforms[srckey] size = cvGetSize(img) imgX = size.width imgY = size.height smb.resizeto(dstkey, img, normsize[0], normsize[1])
def save(self, url, path): log.info('save(): %s to %s' % (colorize((url, path)))) file = QFile(path) cached = self.cache.data(QUrl(url)).readAll() if file.open(QIODevice.WriteOnly): file.write(cached) file.close() return path
def savesymbols(self, srckey, smbdir, ser): log.debug('saveSymbols(): %s' % colorize(srckey)) savedir = os.getcwd() os.chdir(smbdir) i = 0 for smb in self.symbols: img = smb.transforms[srckey] self.save(img, '_%d_%s.png' % (i, ser)) i += 1 os.chdir(savedir)
def getdata(self, img): log.debug('getdata(): size: %sx%s' % colorize((img.width, img.height))) data = [] for x in range(img.width): for y in range(img.height): data.append(1 if img[y, x] > 0 else 0) ''' for i in xrange(img.height): print data[i * img.width : (i + 1) * img.width] print ''' return data
def js(self, script): # выполняем js log.debug('js(): evalute %s' % colorize(script)) result = self.mainFrame().evaluateJavaScript(script).toString() log.debug('js(): result %s' % colorize(result)) # подождем немножко, вдруг прилетит сигнал о начале загрузки if self.waiter.wait(self.loadStarted, 30): # началась загрузка, ждем ее окончания за разумное время log.debug('js(): loading start') if self.waiter.wait(self.loadFinished, self.timeoutsec * 1000): log.debug('js(): loading fifnish') else: log.debug('js(): loading timeout') # подгружаем jQuery, если указано if self.autojq: self.jquerify() return result
def morphology(self, key, src, method, iterations=1, kernel=None): log.debug('morphology(): %s' % colorize(key)) tmp = cvCreateImage(cvGetSize(src), src.depth, src.nChannels) if not kernel: kernel = cvCreateStructuringElementEx(3, 3, 1, 1, CV_SHAPE_ELLIPSE) if method == 0: cvErode(src, tmp, kernel, iterations) self.transforms[key] = tmp elif method == 1: cvDilate(src, tmp, kernel, iterations) self.transforms[key] = tmp elif 2 <= method <= 6: res = cvCreateImage(cvGetSize(src), src.depth, src.nChannels) cvMorphologyEx(src, res, tmp, kernel, method, iterations) self.transforms[key] = res
def __init__(self, autojq=True, timeoutsec=120): QWebPage.__init__(self) self.autojq = autojq self.timeoutsec = timeoutsec self.loop = QEventLoop() self.waiter = Waiter() # фиксенные кеш и менеджер self.cache = Cache() self.manager = Manager() self.cache.setCacheDirectory(CACHE_PATH) self.manager.setCache(self.cache) self.setNetworkAccessManager(self.manager) log.info('init(): autojq: %s, timeoutsec: %s, cache: %s' % colorize((autojq, timeoutsec, CACHE_PATH)))
def __init__(self, site, picsize, charset): self.site = site self.trainpath = TRAINS_PATH + self.site + '.trn' self.netpath = NETS_PATH + self.site + '.ann' self.symbolpath = SYMBOLS_PATH + self.site self.charset = charset self.input = picsize[0] * picsize[1] self.output = len(charset) self.hidden = self.input / 3 self.layers = 3 self.desiredError = 0.00006 self.maxEpochs = 50000 self.epochsBetweenReports = 1000 self.ann = libfann.neural_net() self.ann.create_sparse_array(self.layers, (self.input, self.hidden, self.output)) self.ann.set_activation_function_hidden(libfann.SIGMOID_SYMMETRIC_STEPWISE) self.ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE) log.info('init(): site: %s, size: %sx%s, charset: %s, input: %s, hidden: %s, output: %s' % colorize((site, picsize[0], picsize[1], charset, self.input, self.hidden, self.output)))
def prepare(self): log.info('prepare(): trainpath: %s, symbolpath: %s' % colorize((self.trainpath, self.symbolpath))) trainfile = open(self.trainpath, 'w') filter = re.compile(SYMBOL_FILTER) symbols = [] for name in os.listdir(self.symbolpath): if filter.match(name): symbols.append(name) trainfile.write('%d %d %d\n' % (len(symbols), self.input, self.output)) for name in symbols: img = cvLoadImage(SYMBOLS_PATH + self.site + '/' + name, CV_LOAD_IMAGE_GRAYSCALE) data = self.getdata(img) trainfile.write('%s\n' % ' '.join(map(str, data))) c = name[0] n = self.charset.index(c) trainfile.write('-1 ' * n + '1' + ' -1' * (self.output - n - 1) + '\n') trainfile.close()
def resizeby(self, key, src, scaleX, scaleY, method=1): log.debug('resizeby(): %s' % colorize(key)) res = cvCreateImage((src.width * scaleX, src.height * scaleY), src.depth, src.nChannels) cvResize(src, res, method) self.transforms[key] = res
def resizeto(self, key, src, resultX, resultY, method=1): log.debug('resizeto(): %s' % colorize(key)) res = cvCreateImage((resultX, resultY), src.depth, src.nChannels) cvResize(src, res, method) self.transforms[key] = res
def grayscale(self, key, src, flags=0): log.debug('garyscale(): %s' % colorize(key)) res = cvCreateImage(cvGetSize(src), src.depth, 1) cvConvertImage(src, res, flags) self.transforms[key] = res
def binarize(self, key, src, threshold, method): log.debug('binarize(): %s' % colorize(key)) res = cvCreateImage(cvGetSize(src), IPL_DEPTH_8U, 1) cvThreshold(src, res, threshold, 255, method) self.transforms[key] = res
def clone(self, key, src): log.debug('clone(): %s' % colorize(key)) self.transforms[key] = cvCloneImage(src)
def train(self): log.info('prepare(): trainpath: %s, netpath: %s' % colorize((self.trainpath, self.netpath))) self.ann.train_on_file(self.trainpath, self.maxEpochs, self.epochsBetweenReports, self.desiredError) self.ann.save(self.netpath)
def load(self): self.ann.create_from_file(self.netpath) log.info('load(): netpath: %s' % colorize(self.netpath))
def slice(self, srckey): log.debug('slice(): srckey: %s, len: %s' % colorize((srckey, len(self.symbols)))) res = [] for smb in self.symbols: res.append(smb[srckey]) return res
def load(self, key, src): self.transforms[key] = QIm2Ipl(src) log.debug('load(): %s' % colorize(key))
def save(self, src, path): log.debug('save(): %s' % colorize(path)) cvSaveImage(path, src)
def open(self, key, path): log.debug('open(): %s from %s' % (colorize(key, path))) self.transforms[key] = cvLoadImage(path)
def get(self, url, headers=None, pull=None): log.info('get(): url: %s' % colorize(url)) self.js('window.location = "%s"' % url) # идем на url
def breakSplit(self, key, src, threshold=0): log.debug('breakSplit(): %s' % colorize(key)) storage = cvCreateMemStorage(0) res = cvCloneImage(src) # алгоритм, не поддающийся документированию и сопровождению # тем не менее leps = [] # переходы prev = 0 # предполагаем, что слева пустое пространство (ничего страшного, если это не так) # для каждого столбца считаем количество белых пикселей for y in xrange(src.cols): qty = 0.0 for x in xrange(src.rows): qty += 1 if src[x][y] > 0 else 0 # если процент белых пикселей в столбце превышает пороговое значение, считаем что там символ curr = 1 if qty / src.rows > threshold else 0 # фиксируем переходы if prev != curr: leps.append(y) prev = curr # корректный последний переход if len(leps) % 2 != 0: leps.append(src.cols - 1) boundPairs = [] # границы for i in xrange(0, len(leps), 2): boundPairs.append((leps[i], leps[i + 1])) # отображение рамок for bounds in boundPairs: p1 = (bounds[0], 0) p2 = (bounds[1], src.rows - 1) cvRectangle(res, p1, p2, gray) self.transforms[key] = res # разбиение на символы for bounds in boundPairs: rect = (bounds[0], 0, bounds[1] - bounds[0], src.rows) roi = cvGetSubRect(src, rect) tmp1 = cvCreateImage(cvGetSize(roi), IPL_DEPTH_8U, 1) tmp2 = cvCreateImage(cvGetSize(roi), IPL_DEPTH_8U, 1) cvCopy(roi, tmp1) cvCopy(roi, tmp2) num, contours = cvFindContours(tmp1, storage, sizeof_CvContour, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, (0, 0)) if num == 0: raise TransformError # выбираем самый крупный контур saveArea = 0 for contour in contours.hrange(): currArea = cvContourArea(contour) if currArea > saveArea: saveArea = currArea saveCont = contour # и вычленяем его rect = cvBoundingRect(saveCont) roi = cvGetSubRect(tmp2, rect) smb = cvCreateImage(cvGetSize(roi), IPL_DEPTH_8U, 1) cvCopy(roi, smb) self.symbols.append(Transformer(key, Ipl2QIm(smb)))
def image(self, url): log.info('image(): from %s' % colorize(url)) cached = self.cache.data(QUrl(url)) img = QImage() img.loadFromData(cached.readAll()) return img
def jquerify(self): log.debug('jquerify(): load from %s' % colorize(JQUERY_PATH)) jquery = open(JQUERY_PATH).read() self.mainFrame().evaluateJavaScript(jquery)
def save(self, src, path): log.debug('save(): %s' % colorize(path)) cvSaveImage(path, src);