def mmdc(input_text, file_format='svg', flags=dict()): if not isinstance(flags, dict): raise Exception("mmdc: flags not a dictionary") if not isinstance(file_format, str) or re.search(r'[^a-z]', file_format) or len(file_format) == 0: raise Exception("mmdc: invalid file format") if not isinstance(input_text, str): input_text = str(input_text) sys.stderr.write("Writing:\n" + input_text + "\n") input_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="w", suffix=".mmd", delete=False) input_file.write(input_text) input_file.close() output_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="w", suffix="." + file_format, delete=False) output_file.close() commands = [get_config('mmdc path', 'mmdc'), '-p', os.path.join(expanduser("~"), 'puppeteer-config.json'), '-i', input_file.name, '-o', output_file.name] for key, val in flags.items(): commands.append('-' + str(key)) commands.append(repr(str(val))) sys.stderr.write("Commands are: " + " ".join(commands) + "\n") try: output = subprocess.check_output(commands, stderr=subprocess.STDOUT).decode() except subprocess.CalledProcessError as err: output = err.output.decode() raise Exception("mmdc: there was an error. " + output) if os.path.getsize(output_file.name) == 0: raise Exception("mmdc: the command did not produce any output. " + output) obj = DAFile() obj.set_random_instance_name() obj.initialize(extension=file_format) obj.copy_into(output_file.name) obj.commit() return obj
def mmdc(input_text, file_format='svg', flags=dict()): if not isinstance(flags, dict): raise Exception("mmdc: flags not a dictionary") if not isinstance(file_format, string_types) or re.search(r'[^a-z]', file_format) or len(file_format) == 0: raise Exception("mmdc: invalid file format") if not isinstance(input_text, string_types): input_text = text_type(input_text) sys.stderr.write("Writing:\n" + input_text + "\n") input_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="w", suffix=".mmd", delete=False) input_file.write(input_text) input_file.close() output_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="w", suffix="." + file_format, delete=False) output_file.close() commands = [get_config('mmdc path', 'mmdc'), '-p', os.path.join(expanduser("~"), 'puppeteer-config.json'), '-i', input_file.name, '-o', output_file.name] for key, val in flags.items(): commands.append('-' + str(key)) commands.append(repr(str(val))) sys.stderr.write("Commands are: " + " ".join(commands) + "\n") try: output = subprocess.check_output(commands, stderr=subprocess.STDOUT).decode() except subprocess.CalledProcessError as err: output = err.output.decode() raise Exception("mmdc: there was an error. " + output) if os.path.getsize(output_file.name) == 0: raise Exception("mmdc: the command did not produce any output. " + output) obj = DAFile() obj.set_random_instance_name() obj.initialize(extension=file_format) obj.copy_into(output_file.name) obj.commit() return obj
def fetch_revenue_info(self): revenue_page = DAFile() revenue_page.set_random_instance_name() revenue_page.initialize(filename='Department-of-Revenue.pdf') address_escaped = urllib.quote(self.one_line()) step = ['casperjs', DAStaticFile(filename='revenue.js').path(), address_escaped, revenue_page.path()] result = subprocess.call(step) if result != 0: time.sleep(5) result = subprocess.call(step) if result != 0: raise Exception("Failed to fetch Revenue information") revenue_page.retrieve() revenue_page.commit() self.revenue_page = revenue_page
def make_event(title=None, location=None, description=None, begin_date=None, begin_time=None, end_date=None, end_time=None, organizer=None, attendees=[]): if title is None: raise Exception("make_event: a title parameter is required") if begin_date is None: raise Exception("make_event: a begin_date parameter is required") if begin_time is None: raise Exception("make_event: a begin_time parameter is required") if end_date is None: raise Exception("make_event: an end_date parameter is required") if end_time is None: raise Exception("make_event: an end_time parameter is required") c = Calendar() e = Event() if organizer is not None: e.organizer = Organizer(common_name=organizer.name.full(), email=organizer.email) if len(attendees) > 0: e.attendees = [ Attendee(common_name=attendee.name.full(), email=attendee.email) for attendee in attendees ] e.name = str(title) e.begin = as_datetime( begin_date.replace_time(begin_time), timezone='UTC').format_datetime('yyyy-MM-dd hh:mm:ss') e.end = as_datetime(end_date.replace_time(end_time), timezone='UTC').format_datetime('yyyy-MM-dd hh:mm:ss') if location not in (None, ''): e.location = str(location) if description not in (None, ''): e.description = str(description) c.events.add(e) c.events ics_file = DAFile('ics_file') ics_file.set_random_instance_name() ics_file.initialize(filename="event.ics", mimetype="text/calendar") with open(ics_file.path(), 'w') as f: f.write(str(c)) ics_file.commit() return ics_file
def download_file(filename, folder_name): file_id = get_file_id(filename, folder_name) if file_id is None: raise Exception("The file was not found") the_file = DAFile() the_file.set_random_instance_name() the_file.initialize(filename=filename) service = api.drive_service() with open(the_file.path(), 'wb') as fh: response = service.files().get_media(fileId=file_id) downloader = apiclient.http.MediaIoBaseDownload(fh, response) done = False while done is False: status, done = downloader.next_chunk() the_file.commit() return the_file
def vyhlaskaText(id): r = requests.get('https://sbirkapp.gov.cz/detail/' + id + '/text') if "pdf" in r.headers['content-type']: thefile = DAFile("thefile") thefile.initialize(filename="vyhlaska.pdf", extension="pdf") thefile.write(r.content, binary="true") return (ocr_file(thefile, language="cs")) elif "docx" in r.headers['content-type']: obsah = [] doc = docx.Document(io.BytesIO(r.content)) for para in doc.paragraphs: obsah.append(para.text) return ('\n'.join(obsah)) elif "doc" == r.headers[ 'content-type'] or "application/msword" == r.headers[ 'content-type']: r = requests.post( 'https://hook.integromat.com/7nmnpym8m6g0byshhd2d7e6jh6y8gcjn?url=https://sbirkapp.gov.cz/detail/' + id + '/text') text = r.content.decode('utf-8') return (text) elif "odt" in r.headers['content-type']: try: with tempfile.NamedTemporaryFile() as tmp: r = requests.get('https://sbirkapp.gov.cz/detail/' + id + '/text') tmp.write(r.content) text = textract.process(tmp.name, extension='odt').decode('utf-8') return (text) except Exception as e: return (e) else: return (r.headers['content-type'])
def as_dafile(self, file_obj = None): """Modify the DAFile file_obj with the contents of the URL and filename in the LazyFile, or if none given, return a new DAFile.""" if file_obj is None: file_obj = DAFile() file_obj.initialize(filename=self.filename) file_obj.from_url(self.url) file_obj.commit() return file_obj else: file_obj.initialize(filename=self.filename) file_obj.from_url(self.url) file_obj.commit()
def as_dafile(self, file_obj=None, o365=None): """Modify the DAFile file_obj with the contents of the URL and filename in the LazyFile, or if none given, return a new DAFile.""" if o365: o365.refresh_download_link(self) if file_obj is None: file_obj = DAFile() file_obj.title = self.primaryTitle() file_obj.initialize(filename=self.filename) file_obj.from_url(self.url) file_obj.commit() return file_obj else: file_obj.initialize(filename=self.filename) file_obj.title = self.primaryTitle() file_obj.from_url(self.url) file_obj.commit()
def fetch_google_street_view_image(self): google_api_key = get_config('google', dict()).get('api key', None) if google_api_key is None: raise Exception("No Google Maps API key") street_view_image = tempfile.NamedTemporaryFile(prefix="datemp", suffix=".png") google_address = urllib.quote(self.one_line(with_city_state=True)) the_url = 'https://maps.googleapis.com/maps/api/streetview?size=640x640&location=' + google_address + '&key=' + google_api_key try: urllib.urlretrieve(the_url, street_view_image.name) except Exception as err: raise Exception('Error retrieving Google Street View image') street_view_pdf = DAFile() street_view_pdf.set_random_instance_name() street_view_pdf.initialize(filename="Google_Street_View.pdf") png_to_pdf(street_view_image.name, street_view_pdf.path()) street_view_pdf.retrieve() street_view_pdf.commit() street_view_png = DAFile() street_view_png.set_random_instance_name() street_view_png.initialize(filename="Google_Street_View.png") street_view_png.copy_into(street_view_image.name) street_view_png.retrieve() street_view_png.commit() self.google_street_view_pdf = street_view_pdf self.google_street_view_image = street_view_png
def get_pdf(self, indexno=None, fileno=None, pageno=None, filename=None): api_key = get_config('docket api key') if indexno is None or fileno is None or pageno is None or filename is None: raise Exception("get_pdf: invalid input") new_file = DAFile() new_file.set_random_instance_name() new_file.initialize(filename=filename) new_file.from_url("https://docket.philalegal.org/docketinfo?docketnum=" + str(self.docketnum) + '&indexno=' + str(indexno) + '&fileno=' + str(fileno) + '&pageno=' + str(pageno) + "&key=" + api_key) #new_file.make_pngs() new_file.retrieve() new_file.commit() return new_file
def fetch_opa_info(self): homestead = tempfile.NamedTemporaryFile(prefix="datemp", suffix=".txt") opa_image = tempfile.NamedTemporaryFile(prefix="datemp", suffix=".png") avi_image = tempfile.NamedTemporaryFile(prefix="datemp", suffix=".png") opa_page = DAFile() opa_page.set_random_instance_name() opa_page.initialize(filename='OPA.pdf') avi_page = DAFile() avi_page.set_random_instance_name() avi_page.initialize(filename='AVI.pdf') address_escaped = urllib.quote(self.one_line()) step = ['casperjs', DAStaticFile(filename='opa-png.js').path(), address_escaped, opa_image.name, avi_image.name, homestead.name] sys.stderr.write(" ".join(step) + "\n") result = subprocess.call(step) if result != 0: time.sleep(5) result = subprocess.call(step) if result != 0 and self.address.get('opa', None) is not None: step = ['casperjs', DAStaticFile(filename='opa-account-png.js').path(), self.address['opa'], opa_image.name, avi_image.name, homestead.name] result = subprocess.call(step) if result != 0: time.sleep(5) result = subprocess.call(step) if result != 0: raise Exception("Failed to fetch OPA information") png_to_pdf(opa_image.name, opa_page.path()) #opa_page.make_pngs() opa_page.retrieve() opa_page.commit() png_to_pdf(avi_image.name, avi_page.path()) #avi_page.make_pngs() avi_page.retrieve() avi_page.commit() self.opa_page = opa_page self.avi_page = avi_page homestead.seek(0, 0) self.has_homestead = False if homestead.read() == "No" else True
def fetch_philadox_info(self): r = DARedis() while r.get('using_philadox') is not None: time.sleep(5) pipe = r.pipeline() pipe.set('using_philadox', 1) pipe.expire('using_philadox', 120) pipe.execute() tdir = tempfile.mkdtemp() info = urllib.quote(json.dumps([self.address['number'], self.address['direction'], self.address['street'], tdir, get_config('philadox username'), get_config('philadox password')])) step = ['casperjs', DAStaticFile(filename='eagleweb.js').path(), info] result = subprocess.call(step) r.delete('using_philadox') if result != 0: raise Exception("Failed to fetch Philadox information") outfiles = [] for pdf_file in sorted([f for f in os.listdir(tdir) if f.endswith('.pdf')]): new_file = DAFile() new_file.set_random_instance_name() new_file.initialize(filename=pdf_file) new_file.copy_into(os.path.join(tdir, pdf_file)) new_file.retrieve() new_file.commit() outfiles.append(new_file) self.philadox_files = outfiles