def new_log(request, fountain_name): """Checks form data and attempts to create new log.""" fountain = Fountain.objects.get(url_name__iexact=fountain_name) form = NewLogForm(request.POST) if not form.is_valid(): print 'Invalid form data' messages.error(request, 'Form data was not valid.') else: print 'Valid form data' last = Log.objects.filter(fountain=fountain).order_by('bottles').last() log = Log(fountain=fountain, bottles=form.cleaned_data['bottles'], date=form.cleaned_data['date'], time=form.cleaned_data['time']) if last and (last.bottles > log.bottles or last.date > log.date): print 'Bottle/date out of range' messages.error(request, 'Bottles or date out of range.') else: print 'Valid log' log.full_clean() try: log.save() except ValidationError: messages.error(request, 'Failed to create log.') signals.new_log_signal.send(sender=log, bottles=log.bottles) messages.success(request, 'New log added.') template = loader.get_template('logger/_messages.html') context = RequestContext(request) return HttpResponse(template.render(context))
def consume(self): for doc in os.listdir(self.CONSUME): doc = os.path.join(self.CONSUME, doc) if not os.path.isfile(doc): continue if not re.match(self.REGEX_TITLE, doc): continue if doc in self._ignore: continue if self._is_ready(doc): continue Log.info("Consuming {}".format(doc), Log.COMPONENT_CONSUMER) tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH) pngs = self._get_greyscale(tempdir, doc) try: text = self._get_ocr(pngs) self._store(text, doc) except OCRError: self._ignore.append(doc) Log.error( "OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER) self._cleanup_tempdir(tempdir) continue else: self._cleanup_tempdir(tempdir) self._cleanup_doc(doc)
def consume(self): for doc in os.listdir(self.CONSUME): doc = os.path.join(self.CONSUME, doc) if not os.path.isfile(doc): continue if not re.match(self.REGEX_TITLE, doc): continue if doc in self._ignore: continue if self._is_ready(doc): continue Log.info("Consuming {}".format(doc), Log.COMPONENT_CONSUMER) pngs = self._get_greyscale(doc) try: text = self._get_ocr(pngs) except OCRError: self._ignore.append(doc) Log.error("OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER) continue self._store(text, doc) self._cleanup(pngs, doc)
def pull(self): """ Fetch all available mail at the target address and store it locally in the consumption directory so that the file consumer can pick it up and do its thing. """ if self._enabled: Log.info("Checking mail", Log.COMPONENT_MAIL) for message in self._get_messages(): Log.debug( 'Storing email: "{}"'.format(message.subject), Log.COMPONENT_MAIL ) t = int(time.mktime(message.time.timetuple())) file_name = os.path.join(Consumer.CONSUME, message.file_name) with open(file_name, "wb") as f: f.write(message.attachment.data) os.utime(file_name, times=(t, t)) self.last_checked = datetime.datetime.now()
def _cleanup(self, tempdir, doc): # Remove temporary directory recursively Log.debug("Deleting directory {}".format(tempdir), Log.COMPONENT_CONSUMER) shutil.rmtree(tempdir) # Remove doc Log.debug("Deleting document {}".format(doc), Log.COMPONENT_CONSUMER) os.unlink(doc)
def _cleanup(self, pngs, doc): png_glob = os.path.join( self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0])) for f in list(glob.glob(png_glob)) + [doc]: Log.debug("Deleting {}".format(f), Log.COMPONENT_CONSUMER) os.unlink(f)
def _guess_language(text): try: guess = langdetect.detect(text) Log.debug( "Language detected: {}".format(guess), Log.COMPONENT_CONSUMER ) return guess except Exception as e: Log.warning( "Language detection error: {}".format(e), Log.COMPONENT_MAIL)
def read( self, request, expression ): acct, lg, agent = expression.split('&') a, a_id = acct.split('=') l, l_tx = lg.split('=') ag, ag_id = agent.split('=') l = Log( log_dt = datetime.now(), account = a_id, agent = ag_id, text = l_tx ) l.save() return HttpResponseRedirect('/tools/account/%s/%s/' % (a_id, ag_id))
def record_log(self, imei, raw, type, send_time=None, **kwargs): device, created = Device.objects.get_or_create(imei=imei) if type == 'ack': # Acknowledgement, so try and find the SMS which this refers to try: sms = SMS.objects.get(device=device, send_time=send_time) l = Log(device=device, received_date_time=datetime.now(), message=raw, cause=sms) except SMS.DoesNotExist: l = Log(device=device, received_date_time=datetime.now(), message=raw) else: l = Log(device=device, received_date_time=datetime.now(), message=raw) l.save() return l
def __init__(self, data, verbosity=1): """ Cribbed heavily from https://www.ianlewis.org/en/parsing-email-attachments-python """ self.verbosity = verbosity self.subject = None self.time = None self.attachment = None message = BytesParser(policy=policy.default).parsebytes(data) self.subject = str(message["Subject"]).replace("\r\n", "") self.body = str(message.get_body()) self.check_subject() self.check_body() self._set_time(message) Log.info( 'Importing email: "{}"'.format(self.subject), Log.COMPONENT_MAIL) attachments = [] for part in message.walk(): content_disposition = part.get("Content-Disposition") if not content_disposition: continue dispositions = content_disposition.strip().split(";") if not dispositions[0].lower() == "attachment": continue file_data = part.get_payload() attachments.append(Attachment( b64decode(file_data), content_type=part.get_content_type())) if len(attachments) == 0: raise InvalidMessageError( "There don't appear to be any attachments to this message") if len(attachments) > 1: raise InvalidMessageError( "There's more than one attachment to this message. It cannot " "be indexed automatically." ) self.attachment = attachments[0]
def parse(self, filename): """ Parses all available information from the given filename. """ filename = os.path.split(filename)[1] try: fmt = self.naming_scheme temp = fmt for tag in tags: temp = temp.replace(tag.tag, tag.re) vars = re.search(temp, filename).groupdict() for tag in [tag for tag in tags if tag.tag in fmt]: vars = tag.relations(vars) if not 'startDatetime' in vars and 'endDatetime' in vars: startdatetime = vars['endDatetime'] - timedelta(self.timespan) vars['startDatetime'] = startdatetime vars['startDate'] = startdatetime.date() vars['startYear'] = startdatetime.strftime('%Y') vars['startMonth'] = startdatetime.strftime('%m') vars['startDay'] = startdatetime.strftime('%d') vars['startTime'] = startdatetime.time() vars['startHour'] = startdatetime.strftime('%H') vars['startMinute'] = startdatetime.strftime('%M') vars['startSecond'] = startdatetime.strftime('%S') elif 'startDatetime' in vars and not 'endDatetime' in vars: enddatetime = vars['startDatetime'] + timedelta(self.timespan) vars['endDatetime'] = enddatetime vars['endDate'] = enddatetime.date() vars['endYear'] = enddatetime.strftime('%Y') vars['endMonth'] = enddatetime.strftime('%m') vars['endDay'] = enddatetime.strftime('%d') vars['endTime'] = enddatetime.time() vars['endHour'] = enddatetime.strftime('%H') vars['endMinute'] = enddatetime.strftime('%M') vars['endSecond'] = enddatetime.strftime('%S') return vars except: Log.error('analysis.models.DataStream.parse', """ An error occurred while attempting to parse filename {} using naming convention {} Check that the naming convention for this data stream has not been changed at the source. """.format(filename, self.naming_scheme))
def _fetch(self): for num in self._connection.search(None, "ALL")[1][0].split(): __, data = self._connection.fetch(num, "(RFC822)") message = None try: message = Message(data[0][1], self.verbosity) except InvalidMessageError as e: Log.error(e, Log.COMPONENT_MAIL) else: self._connection.store(num, "+FLAGS", "\\Deleted") if message: yield message
def _get_greyscale(self, tempdir, doc): Log.debug( "Generating greyscale image from {}".format(doc), Log.COMPONENT_CONSUMER ) png = os.path.join(tempdir, "convert-%04d.jpg") subprocess.Popen(( self.CONVERT, "-density", "300", "-depth", "8", "-type", "grayscale", doc, png )).wait() pngs = [os.path.join(tempdir, f) for f in os.listdir(tempdir) if f.startswith("convert")] return sorted(filter(lambda f: os.path.isfile(f), pngs))
def _get_greyscale(self, doc): Log.debug( "Generating greyscale image from {}".format(doc), Log.COMPONENT_CONSUMER ) i = random.randint(1000000, 9999999) png = os.path.join(self.SCRATCH, "{}.png".format(i)) subprocess.Popen(( self.CONVERT, "-density", "300", "-depth", "8", "-type", "grayscale", doc, png )).wait() return sorted(glob.glob(os.path.join(self.SCRATCH, "{}*".format(i))))
def _ocr(self, pngs, lang): """ Performs a single OCR attempt. """ if not pngs: return "" Log.debug("Parsing for {}".format(lang), Log.COMPONENT_CONSUMER) with Pool(processes=self.THREADS) as pool: r = pool.map( self.image_to_string, itertools.product(pngs, [lang])) r = " ".join(r) # Strip out excess white space to allow matching to go smoother return re.sub(r"\s+", " ", r)
def addLog(request): user = request.user date = datetime.strptime(request.POST['date'], '%d.%m.%Y') theme = request.POST['theme'] notes = request.POST['notes'] if request.POST['is_implicit'] == 'true': is_implicit = True fallacies = {} else: is_implicit = False fallacies = json.loads(request.POST['fallacies']) newlog = Log(user=user, date=date, is_implicit=is_implicit, fallacies=fallacies, theme=theme, notes=notes) newlog.save() return HttpResponse(1)
def fetch_data_ftp(self): from ftplib import FTP import os host_directory = eval(self.host_directory) test = eval(self.file_test) client_subdirectory = eval(self.client_subdirectory) target =os.path.join(settings.STATIC_ROOT, 'data', self.client_directory, client_subdirectory()) ftp = FTP(host=self.host) ftp.login(user=self.user, passwd=self.password) try: ftp.cwd(host_directory()) except ftp_lib.error_perm as e: if e.errno == 550: Log.error('atmospherics.data.models.FTPSource.fetch_data_ftp', """ An error occurred while accessing the directory {} on {}. Try checking the host server to ensure that their naming and filing scheme has not changed. The code used to generate this directory: {} """.format( host_directory(), self.host, self.host_directory )) else: raise data = [] for datafile in ftp.nlst(): if test(datafile): if os.path.exists(os.path.join(target, datafile)) and self.overwrite == False: pass else: data.append(datafile) if not os.path.exists(target): try: os.makedirs(target) except OSError, e: if e.errno !=17: raise pass
def _get_messages(self): r = [] try: self._connect() self._login() for message in self._fetch(): if message: r.append(message) self._connection.expunge() self._connection.close() self._connection.logout() except Exception as e: Log.error(e, Log.COMPONENT_MAIL) return r
def fetch_data_math(self): ssh=pysftp.Connection(settings.SSH_HOST, username=settings.SSH_USER, password=settings.SSH_PASSWORD) temp = ssh.execute('mktemp -d')[0].rstrip('\n') ssh.chdir(temp) code, matches = self.replace_exports(temp) Log.debug('atmospherics.data.models.MathematicaSource.fetch_data', code) code = code.replace("'", '\'"\'"\'') command = "echo '{}' > {}/package.m".format(code, temp) ssh.execute(command) ret = ssh.execute('xvfb-run -s "-screen 0 640x480x24" math -script {}/package.m &\n\n\n\n'.format(temp)) if ret: message = 'A message was returned by mathematica script {}.m:\n{}'.format(self.name, ret[-100:]) Log.info('atmospherics.data.models.MathematicaSource.fetch_data', message) ssh.execute('rm {}'.format(os.path.join(temp, 'package.m'))) client_subdirectory = eval(self.client_subdirectory) target =os.path.join(settings.STATIC_ROOT, 'data', self.client_directory, client_subdirectory()) if not os.path.exists(target): os.makedirs(target) ssh.get_d(temp, target) #ssh.execute('rm -rf {}'.format(temp)) ssh.execute('disown') ssh.close() message = 'MathematicaSource {} run.\nOutput saved to:\nhttp://atmospherics.lossofgenerality.com/{}'.format(self.name, target) Log.info('atmospherics.data.models.MathematicaSource.fetch_data', message)
def _store(self, text, doc): sender, title, tags, file_type = self._guess_attributes_from_name(doc) tags = list(tags) lower_text = text.lower() relevant_tags = set( [t for t in Tag.objects.all() if t.matches(lower_text)] + tags) stats = os.stat(doc) Log.debug("Saving record to database", Log.COMPONENT_CONSUMER) document = Document.objects.create( sender=sender, title=title, content=text, file_type=file_type, created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)) ) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) Log.debug( "Tagging with {}".format(tag_names), Log.COMPONENT_CONSUMER) document.tags.add(*relevant_tags) with open(doc, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: Log.debug("Encrypting", Log.COMPONENT_CONSUMER) encrypted.write(GnuPG.encrypted(unencrypted))
def _store(self, text, doc): sender, title, tags, file_type = self._guess_attributes_from_name(doc) relevant_tags = set(list(Tag.match_all(text)) + list(tags)) stats = os.stat(doc) Log.debug("Saving record to database", Log.COMPONENT_CONSUMER) document = Document.objects.create( sender=sender, title=title, content=text, file_type=file_type, created=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)), modified=timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime)) ) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) Log.debug( "Tagging with {}".format(tag_names), Log.COMPONENT_CONSUMER) document.tags.add(*relevant_tags) with open(doc, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: Log.debug("Encrypting", Log.COMPONENT_CONSUMER) encrypted.write(GnuPG.encrypted(unencrypted))
def _get_ocr(self, pngs): """ Attempts to do the best job possible OCR'ing the document based on simple language detection trial & error. """ if not pngs: raise OCRError Log.debug("OCRing the document", Log.COMPONENT_CONSUMER) # Since the division gets rounded down by int, this calculation works # for every edge-case, i.e. 1 middle = int(len(pngs) / 2) raw_text = self._ocr([pngs[middle]], self.DEFAULT_OCR_LANGUAGE) guessed_language = self._guess_language(raw_text) if not guessed_language or guessed_language not in ISO639: Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER) if settings.FORGIVING_OCR: Log.warning( "As FORGIVING_OCR is enabled, we're going to make the " "best with what we have.", Log.COMPONENT_CONSUMER ) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) return raw_text raise OCRError if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE: raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) return raw_text try: return self._ocr(pngs, ISO639[guessed_language]) except pyocr.pyocr.tesseract.TesseractError: if settings.FORGIVING_OCR: Log.warning( "OCR for {} failed, but we're going to stick with what " "we've got since FORGIVING_OCR is enabled.".format( guessed_language ), Log.COMPONENT_CONSUMER ) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) return raw_text raise OCRError
def _get_ocr(self, pngs): """ Attempts to do the best job possible OCR'ing the document based on simple language detection trial & error. """ if not pngs: raise OCRError Log.debug("OCRing the document", Log.COMPONENT_CONSUMER) # Since the division gets rounded down by int, this calculation works # for every edge-case, i.e. 1 middle = int(len(pngs) / 2) raw_text = self._ocr([pngs[middle]], self.DEFAULT_OCR_LANGUAGE) guessed_language = self._guess_language(raw_text) if not guessed_language or guessed_language not in ISO639: Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER) if settings.FORGIVING_OCR: Log.warning( "As FORGIVING_OCR is enabled, we're going to make the best " "with what we have.", Log.COMPONENT_CONSUMER ) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) return raw_text raise OCRError if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE: raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) return raw_text try: return self._ocr(pngs, ISO639[guessed_language]) except pyocr.pyocr.tesseract.TesseractError: if settings.FORGIVING_OCR: Log.warning( "OCR for {} failed, but we're going to stick with what " "we've got since FORGIVING_OCR is enabled.".format( guessed_language ), Log.COMPONENT_CONSUMER ) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) return raw_text raise OCRError
def uploader(request): if request.method == 'POST': #Si ya mandaron el formulario entonces necesitaremos un formulario #"atado" a POST y FILES form = UploaderForm(request.POST, request.FILES) if form.is_valid(): file_data = form['logfile'] for line in file_data.value().readlines(): p = parsear(line) if len(p) == 3: fecha, tipo, mensaje = p fecha = datetime.strptime(fecha, '%a %b %d %H:%M:%S %Y') l = Log(fecha = fecha, tipo = tipo, mensaje = mensaje) l.save() return HttpResponseRedirect('/') # Redirect after POST else: #Esto ocurre si acaban de visitar la pagina. form = UploaderForm() # Un formulario "desatado" o "unbound" #CSRF c = {'form': form} c.update(csrf(request)) return render_to_response('uploader.html', RequestContext(request, c))
def pull(self): """ Fetch all available mail at the target address and store it locally in the consumption directory so that the file consumer can pick it up and do its thing. """ if self._enabled: Log.info("Checking mail", Log.COMPONENT_MAIL) for message in self._get_messages(): Log.debug('Storing email: "{}"'.format(message.subject), Log.COMPONENT_MAIL) t = int(time.mktime(message.time.timetuple())) file_name = os.path.join(Consumer.CONSUME, message.file_name) with open(file_name, "wb") as f: f.write(message.attachment.data) os.utime(file_name, times=(t, t)) self.last_checked = datetime.datetime.now()
def mathematica_session(math_session, extra_args, user): """ Runs the given Math session on the Thorek01 server. """ ssh=pysftp.Connection(settings.SSH_HOST, username=settings.SSH_USER, password=settings.SSH_PASSWORD) if ssh.execute('pidof MathKernel') or ssh.execute('pidof Mathematica'): ssh.close() mathematica_session.retry(countdown=5*60, max_retries=(60/5)*24) temp = ssh.execute('mktemp -d')[0].rstrip('\n') ssh.chdir(temp) code, matches = math_session.replace_exports(temp) code = code.replace('{extra_args}', extra_args['extra_args']) code = code.replace('{data}', extra_args['data']) Log.debug('atmospherics.analysis.tasks.mathematica_session', code) code = code.replace("'", '\'"\'"\'') command = "echo '{}' > {}/package.m".format(code, temp) ssh.execute(command) ret = ssh.execute('xvfb-run -s "-screen 0 640x480x24" math -script {}/package.m &\n\n\n\n'.format(temp)) target = os.path.join(settings.MEDIA_ROOT, user.username, 'output', math_session.name.replace(' ', '_')+datetime.now().strftime('_%m%d%y_%H%M')) os.makedirs(target) ssh.get_d(temp, target) if ret: message = ''' A message was returned by mathematica script {}.m:\n (trimmed to contain only the last 100 lines) \n\n {} '''.format(math_session.name, '\n'.join(ret[-100:])) Log.info('atmospherics.analysis.tasks.mathematica_session', message) with open(os.path.join(target, 'response.txt'), 'w') as logfile: logfile.write(message) #ssh.execute('rm -rf {}'.format(temp)) ssh.execute('disown') ssh.close() message = 'Mathematica session {} run.\nOutput saved to:\nhttp://atmospherics.lossofgenerality.com/{}'.format(math_session.name, os.path.join('output', user.username, os.path.split(target)[1])) Log.info('atmospherics.analysis.tasks.mathematica_session', message) if hasattr(user, 'email'): subject = 'Atmospherics Mathematica {} Complete'.format(math_session.name) from_email = 'Atmospherics<*****@*****.**>' email = EmailMultiAlternatives(subject, message, from_email, [user.email]) email.send()
def mathematica_package(math_package, extra_args, user): """ Runs the given Math package on the Thorek01 server. """ ssh=pysftp.Connection(settings.SSH_HOST, username=settings.SSH_USER, password=settings.SSH_PASSWORD) temp = ssh.execute('mktemp -d')[0].rstrip('\n') ssh.chdir(temp) code, matches = math_package.replace_exports(temp) code = code.replace('{extra_args}', extra_args['extra_args']) code = code.replace('{data}', extra_args['data']) Log.debug('atmospherics.analysis.tasks.mathematica_package', code) code = code.replace("'", '\'"\'"\'') command = "echo '{}' > {}/package.m".format(code, temp) ssh.execute(command) ret = ssh.execute('xvfb-run -s "-screen 0 640x480x24" math -script {}/package.m &\n\n\n\n'.format(temp)) if ret: message = 'A message was returned by mathematica script {}.m:\n{}'.format(math_package.name, ret[-100:]) Log.info('atmospherics.analysis.tasks.mathematica_package', message) #ssh.execute('rm {}'.format(os.path.join(temp, 'package.m'))) target = os.path.join(settings.MEDIA_ROOT, user.username, 'output', math_package.name.replace(' ', '_')+datetime.now().strftime('_%m%d%y_%H%M')) os.makedirs(target) ssh.get_d(temp, target) #ssh.execute('rm -rf {}'.format(temp)) ssh.execute('disown') ssh.close() message = 'Mathematica package {} run.\nOutput saved to:\nhttp://atmospherics.lossofgenerality.com/{}'.format(math_package.name, os.path.join('output', user.username, os.path.split(target)[1])) Log.info('atmospherics.analysis.tasks.mathematica_package', message) if user.email: subject = 'Atmospherics Mathematica {} Complete'.format(math_package.name) from_email = 'Atmospherics<*****@*****.**>' email = EmailMultiAlternatives(subject, message, from_email, [user.email]) email.send()
def log_session_error(request, exception): # -- General info log = Log(exception_type=type(exception).__name__, message=exception.detail, stack_trace=traceback.format_exc()) log.request_url = request.get_full_path() log.request_method = request.method log.get_data = json.dumps(request.GET) log.post_data = json.dumps(request.POST) log.request_body = '{}' log.cookies = json.dumps(request.COOKIES) # --- Request meta info log.meta = ','.join('"%s": "%s"' % (k, str(v)) for k, v in request.META.items()) log.meta = '{%s}' % log.meta log.meta = log.meta.replace('\\', '|') # --- User info if request.user.is_authenticated(): log.user_id = request.user.id log.user_name = request.user.email # --- User agent info user_agent = request.user_agent # Browser log.request_browser = user_agent.browser # OS log.request_os = user_agent.os # Device log.request_device = user_agent.device # Device type log.is_mobile = user_agent.is_mobile log.is_tablet = user_agent.is_tablet log.is_touch_capable = user_agent.is_touch_capable log.is_pc = user_agent.is_pc log.is_bot = user_agent.is_bot # --- Save log.save() return None
def _cleanup_doc(doc): Log.debug("Deleting document {}".format(doc), Log.COMPONENT_CONSUMER) os.unlink(doc)
def log_cron(cron, action, data=''): log = Log(cron=cron, action=action, data=data) log.save()
def _cleanup_tempdir(d): Log.debug("Deleting directory {}".format(d), Log.COMPONENT_CONSUMER) shutil.rmtree(d)
def log_mop(mop, action, data=''): cron = mop.player.cron log = Log(cron=cron, mop=mop, action=action, data=data) log.save()