def mimetype_from_path(self, path): return magic.Magic(mime=True).from_file(path)
from restfulgit.utils.json_err_pages import json_error_page, register_general_error_handler from restfulgit.utils.url_converters import SHAConverter, register_converter from restfulgit.utils import mime_types # Optionally use better libmagic-based MIME-type guessing try: import magic as libmagic except ImportError: import mimetypes def guess_mime_type(filename, content): # pylint: disable=W0613 (mime_type, encoding) = mimetypes.guess_type(filename) # pylint: disable=W0612 return mime_type else: import atexit MAGIC = libmagic.Magic(flags=libmagic.MAGIC_MIME_TYPE) atexit.register(MAGIC.close) def guess_mime_type(filename, content): # pylint: disable=W0613 return MAGIC.id_buffer(content) porcelain = Blueprint('porcelain', __name__) # pylint: disable=C0103 register_converter(porcelain, 'sha', SHAConverter) register_general_error_handler(porcelain, json_error_page) @porcelain.route('/repos/') @corsify @jsonify def get_repo_list():
def test_getparam(self): m = magic.Magic(mime=True) m.setparam(magic.MAGIC_PARAM_INDIR_MAX, 1) self.assertEqual(m.getparam(magic.MAGIC_PARAM_INDIR_MAX), 1)
def allowed_file(filename): mime = magic.Magic(mime=True) if mime.from_file(filename) in ALLOWED_MIME: return True return False
def _magic(): import magic # type: ignore return magic.Magic(mime=True)
from datetime import datetime from typing import Any import magic # type: ignore import requests from requests import Response mime: Any = magic.Magic(mime=True) file_access_url = "https://file-access-cvee2224cq-ew.a.run.app" def log(message: str): print(f'{datetime.now().strftime("%H:%M:%S")}: {message}') def upload_artifact(upload_path: str, filename: str): log(f"Saving {filename} to {upload_path}") content_type: str = mime.from_file(filename) with open(filename, "rb") as data: resp: Response = requests.get( f"{file_access_url}/get_upload_url/{upload_path}", params={"content_type": content_type}, ) signed_url: str = resp.text requests.put(signed_url, data, headers={"Content-Type": content_type})
async def _(event): if event.fwd_from: return mone = await edit_or_reply(event, "Processing ...") if Config.VERY_STREAM_LOGIN is None or Config.VERY_STREAM_KEY is None: await mone.edit( "This module requires API key from https://verystream.com. Aborting!" ) return False input_str = event.pattern_match.group(1) if not os.path.isdir(Config.TMP_DOWNLOAD_DIRECTORY): os.makedirs(Config.TMP_DOWNLOAD_DIRECTORY) required_file_name = None start = datetime.now() if event.reply_to_msg_id and not input_str: reply_message = await event.get_reply_message() try: c_time = time.time() downloaded_file_name = await borg.download_media( reply_message, Config.TMP_DOWNLOAD_DIRECTORY, progress_callback=lambda d, t: asyncio.get_event_loop().create_task( progress(d, t, mone, c_time, "trying to download") ), ) except Exception as e: # pylint:disable=C0103,W0703 await mone.edit(str(e)) return False else: end = datetime.now() ms = (end - start).seconds required_file_name = downloaded_file_name await mone.edit( "Downloaded to `{}` in {} seconds.".format(downloaded_file_name, ms) ) elif input_str: input_str = input_str.strip() if os.path.exists(input_str): end = datetime.now() ms = (end - start).seconds required_file_name = input_str await mone.edit("Found `{}` in {} seconds.".format(input_str, ms)) else: await mone.edit("File Not found in local server. Give me a file path :((") return False # logger.info(required_file_name) if required_file_name: # required_file_name will have the full path file_name = os.path.basename(required_file_name) if "." in file_name: file_name = file_name.rsplit(".", maxsplit=1)[0] file_name = file_name + str(time.time()) os.stat(required_file_name).st_size # https://stackoverflow.com/a/22058673/4723940 sha_one_file_hash = get_sha_one_hash(required_file_name, 65536) # /* STEP 1: get upload_key */ login = Config.VERY_STREAM_LOGIN key = Config.VERY_STREAM_KEY sha1 = sha_one_file_hash mime = magic.Magic(mime=True) step_zero_url = f"https://api.verystream.com/file/createfolder?login={login}&key={key}&name={file_name}" async with aiohttp.ClientSession() as session: resp_zero = await session.get(step_zero_url) step_zero_response_text = json.loads(await resp_zero.text()) # logger.info(step_zero_response_text) if step_zero_response_text["status"] == 200: folder_id_e = step_zero_response_text["result"]["folderid"] await mone.edit(f"Created Folder with ID: {folder_id_e}") step_one_url = f"https://api.verystream.com/file/ul?login={login}&key={key}&sha1={sha1}&folder={folder_id_e}" resp = await session.get(step_one_url) # logger.info(resp.status) step_one_response_text = json.loads(await resp.text()) # logger.info(step_one_response_text) if step_one_response_text["status"] == 200: url = step_one_response_text["result"]["url"] await mone.edit(f"Start Uploading to {url}") start = datetime.now() files = {"file1": (file_name, open(required_file_name, "rb"))} resp = requests.post(url, files=files) step_two_response_text = resp.json() # logger.info(step_two_response_text) if step_two_response_text["status"] == 200: output_str = json.dumps( step_two_response_text["result"], sort_keys=True, indent=4 ) stream_url = step_two_response_text["result"]["url"] end = datetime.now() ms = (end - start).seconds await mone.edit( f"Obtained {stream_url} in {ms} seconds.\n{output_str}" ) # cleanup await event.delete() try: os.remove(required_file_name) except: pass else: await mone.edit( f"VeryStream returned {step_two_response_text['status']} => {step_two_response_text['msg']}, after STEP ONE" ) else: await mone.edit( f"VeryStream returned {step_one_response_text['status']} => {step_one_response_text['msg']}, after STEP ONE" ) else: await mone.edit( f"VeryStream returned {step_zero_response_text['status']} => {step_zero_response_text['msg']}, after STEP INIT" ) else: await mone.edit("File Not found in local server. Give me a file path :((")
def get_content_type(self): f = magic.Magic(mime=True) self.file.seek(0) return f.from_buffer(self.file.read())
def upload(): """ --> passcode, private, image, url <-- redirect uri """ form = UploadForm() # Verify password if form.validate_on_submit( ) and current_app.config["UPLOAD_PASSWORD"] == form.password.data: redis_config = current_app.config["REDIS_CONFIG"] if redis_config: redis_client = redis.Redis(**redis_config, decode_responses=True) else: redis_client = None max_size = current_app.config["MAX_UPLOAD_SIZE_BYTES"] if form.file.data: # TODO: Consolidate code since a lot of this is repeated # Try and get the extension from the filename. If that fails use magic to figure it out. file_ext = os.path.splitext(form.file.data.filename)[1] content_type = form.file.data.content_type print(f"File extension: {file_ext}") # Extension couldn't be determined from the filename, attempt using the headers. if not file_ext and content_type: file_ext = mimetypes.guess_extension(content_type, strict=True) print(f"Guessing file extension from headers: {file_ext}") # Extension couldn't be determined from the filename or the headers, try reading the data. if not file_ext: magic_instance = magic.Magic(mime=True) mime_type = magic_instance.from_buffer( form.file.data.stream.read()) file_ext = mimetypes.guess_extension(mime_type, strict=True) print(f"Guessing file extension from buffer: {file_ext}") if not file_ext: # TODO: Flash error print(f"Invalid file extension: {file_ext}") return redirect(url_for('home.index')) file_name = get_filename(file_ext, form.private.data) file_path = os.path.join(current_app.config["UPLOAD_PATH"], file_name) form.file.data.save(file_path) file_size = os.path.getsize(file_path) if file_size > max_size or file_size <= 0: print("Upload size invalid") if os.path.exists(file_path): os.remove(file_path) if not form.private.data and redis_client.connection: redis_client.publish( current_app.config["REDIS_CHANNEL"], url_for('home.uploaded_file', filename=file_name, _external=True)) return redirect(url_for('home.uploaded_file', filename=file_name)) elif form.url.data: print("URL: " + form.url.data) # download file if file is URL download_request = requests.get(form.url.data, stream=True) if not download_request.ok: # TODO: Flash error print("Download request failed.") return redirect(url_for('home.index')) content_len = int(download_request.headers["Content-Length"] or 0) # Do first size check before downloading if content_len > max_size or content_len <= 0: # TODO: Flash error print("Upload size invalid") return redirect(url_for('home.index')) print(f"Content-Length: {content_len}") content_type = download_request.headers["Content-Type"] file_ext = mimetypes.guess_extension(content_type, strict=True) if not file_ext: # TODO: Flash error print("Unknown file extension") return redirect(url_for('home.index')) file_name = get_filename(file_ext, form.private.data) file_path = os.path.join(current_app.config["UPLOAD_PATH"], file_name) downloaded_size = 0 try: with open(file_path, "wb") as out_file: for block in download_request.iter_content( 1024, decode_unicode=True): if not block: break out_file.write(block) downloaded_size += len(block) # Second size check occurs while downloading. if downloaded_size > max_size: raise UploadSizeException except UploadSizeException: print("Upload size exceeded") if os.path.exists(file_path): os.remove(file_path) if not form.private.data and redis_client.connection: redis_client.publish( current_app.config["REDIS_CHANNEL"], url_for('home.uploaded_file', filename=file_name, _external=True)) return redirect(url_for('home.uploaded_file', filename=file_name)) return redirect(url_for('home.index'))
import boto3 from botocore.client import Config from botocore.exceptions import ClientError from django.conf import settings from django.contrib.staticfiles.storage import ManifestFilesMixin from django.core.exceptions import ImproperlyConfigured from django.core.files.base import File from django.core.files.storage import Storage from django.core.signals import setting_changed from django.utils.deconstruct import deconstructible from django.utils.encoding import filepath_to_uri, force_bytes, force_str, force_text from django.utils.timezone import make_naive, utc import magic mime_detector = magic.Magic(mime=True) READ_MAGIC_BYTES = 1024 # Most of the file types can be identified from the header - don't read the whole file. def _wrap_errors(func): @wraps(func) def _do_wrap_errors(self, name, *args, **kwargs): try: return func(self, name, *args, **kwargs) except ClientError as ex: code = ex.response.get("Error", {}).get("Code", "Unknown") err_cls = OSError if code == "NoSuchKey": err_cls = FileNotFoundError raise err_cls("S3Storage error at {!r}: {}".format( name, force_text(ex)))
def get_mimetype(malware_path): """Finds the standard mimetype for file and returns type name.""" mime = magic.Magic(mime=True) return mime.from_file(malware_path)
def allowed_file(filename): mime = magic.Magic(mime=True) mime_type = mime.from_file(filename) return mime_type in ["image/jpeg", "image/png", "image/gif"]
class Parser(metaclass=abc.ABCMeta): """Generic parser iterator. Base parser class.""" wizard = magic.Magic(mime=True) def __init__(self, handle): self.__closed = False if not isinstance(handle, io.IOBase): if handle.endswith(".gz") or self.wizard.from_file(handle) == b"application/gzip": opener = gzip.open elif handle.endswith(".bz2") or self.wizard.from_file(handle) == b"application/x-bzip2": opener = bz2.open else: opener = partial(open, **{"buffering": 1}) try: handle = opener(handle, "rt") except FileNotFoundError: raise FileNotFoundError("File not found: {0}".format(handle)) self._handle = handle self.closed = False def __iter__(self): return self def __next__(self): line = self._handle.readline() return line def __enter__(self): if self.closed is True: raise ValueError('I/O operation on closed file.') return self def __exit__(self, *args): _ = args self._handle.close() self.closed = True def close(self): """ Alias for __exit__ """ self.__exit__() @property def name(self): """ Return the filename. """ return self._handle.name @property def closed(self): """ Boolean flag. If True, the file has been closed already. """ return self.__closed @closed.setter def closed(self, *args): """ :param args: boolean flag This sets the closed flag of the file. """ if not isinstance(args[0], bool): raise TypeError("Invalid value: {0}".format(args[0])) self.__closed = args[0]
def bulk_imsis(): try: rtn_msg = "" chk_mno = False mno = request.form.get("mno") for key, val in conf.items(): # checking for correct operator's name if mno == val: chk_mno = True if not chk_mno: data = { "Error": "improper Operator-name provided" } return data, 422 else: file = request.files.get('file') if file and file_allowed(file.filename): filename = secure_filename(file.filename) file_path = os.path.join(UPLOAD_FOLDER, filename) file.save(os.path.join(UPLOAD_FOLDER, filename)) f = magic.Magic(mime=True) file_type = f.from_file(file_path) if file_type != 'text/plain': data = { "Error": "File type is not valid" } if os.path.isfile(file_path): os.remove(file_path) return data, 422 # --------------------------------------------------------------------------------------------------- pat1 = re.compile(r'923\d{9}') pat2 = re.compile(r'\d{15}') try: newfile = open(file_path, 'r') df = pd.read_csv(newfile, usecols=range(2), dtype={"MSISDN": str, "IMSI": str}) newfile.close() except Exception as e: if e: newfile.close() data = { "Error": "File content is not Correct" } if os.path.isfile(file_path): os.remove(file_path) return data, 422 total_rows, total_columns = df.shape if df.columns[0] != 'MSISDN' or df.columns[1] != 'IMSI': data = { "Error": "File headers are missing or incorrect" } if os.path.isfile(file_path): os.remove(file_path) return data, 422 df_dup = df[df.duplicated(['IMSI'], keep='first')] # To detect duplicated IMSIs in uploaded File if not df_dup.empty: data = { "Error": "File contains duplicated IMSIs" } if os.path.isfile(file_path): os.remove(file_path) return data, 422 else: df2 = df[df.isnull().any(axis=1)] df1 = df.dropna() df3 = df1[(df1.IMSI.astype(str).str.len() > 15)] df1 = df1[~(df1.IMSI.astype(str).str.len() > 15)] df4 = df1[(df1.MSISDN.astype(str).str.len() > 12)] df1 = df1[~(df1.MSISDN.astype(str).str.len() > 12)] df5 = df1[~(df1.IMSI.astype(str).str.match(pat2))] df1 = df1[(df1.IMSI.astype(str).str.match(pat2))] df6 = df1[~(df1.MSISDN.astype(str).str.match(pat1))] df1 = df1[(df1.MSISDN.astype(str).str.match(pat1))] final_rows, final_columns = df1.shape del_rec = (total_rows - final_rows) df1.to_csv(file_path, index=False) lst_df = [df2, df3, df4, df5, df6] dfs = pd.concat(lst_df, ignore_index=False) # --------------------------------------------------------------------------------------------------- con = connect() cur = con.cursor() filename1 = os.path.join(UPLOAD_FOLDER, filename) cur.execute(""" CREATE TABLE if not exists test_mno (t_msisdn text, t_imsi text); """) f = open(filename1) cur.copy_from(f, 'test_mno', sep=",") cur.execute(""" select msisdn, imsi, t_msisdn, t_imsi, change_type, export_status, old_imsi from pairing inner join test_mno on (pairing.msisdn = test_mno.t_msisdn) and (pairing.end_date is null) and (pairing.add_pair_status = true) and (pairing.operator_name = '{}'); """.format(mno)) cur.execute(""" update pairing set imsi = test_mno.t_imsi, change_type = 'ADD', export_status = false, updated_at = date_trunc('second', NOW()) from test_mno where pairing.msisdn = test_mno.t_msisdn and pairing.end_date is null and pairing.add_pair_status = true and pairing.operator_name = '{}'; """.format(mno)) cur.execute(""" drop table if exists test_mno; """) con.commit() con.close() f.close() if del_rec: error_file = "Error-Records_" + mno + '_' + strftime("%Y-%m-%d_%H-%M-%S") + '.csv' download_path = os.path.join(DOWNLOAD_FOLDER, error_file) file.save(download_path) dfs.to_csv(download_path, index=False) else: download_path = "No error file available" rtn_msg = { "msg": "File successfully loaded", "Total_Records": total_rows, "Successful_Records": final_rows, "Deleted_Record": del_rec, "link": download_path } return rtn_msg, 200 else: rtn_msg = { "Error": "No file or improper file found" } return rtn_msg, 422 except Exception as e: db.session.rollback() finally: db.session.close()
def get_mime_type(file_path): mime = magic.Magic(mime=True) mime_type = mime.from_file(file_path) mime_type = mime_type if mime_type else "text/plain" return mime_type
def mime_type_from_file(self, file): m_mime = magic.Magic(mime=True) mime_type = m_mime.from_buffer( file.read(1024)).decode('utf-8').split(';')[0] file.seek(0) return mime_type
def main(): global debug global timeoutSeconds global VM_SNAPSHOT global VMRUN global VMX global dontrun global errorCount parser = argparse.ArgumentParser() parser.add_argument('-f', '--file', help='filename', required=False) parser.add_argument('-d', '--debug', dest='debug', action='store_true', help='Show all commands for debugging', required=False) parser.add_argument('-t', '--timeout', help='Number of seconds to collect activity', required=False, type=int) parser.add_argument('-x', '--dontrun', dest='dontrun', action='store_true', help='Do not run file', required=False) parser.add_argument('-xx', '--dontrunnothing', dest='dontrunnothing', action='store_true', help='Run nothing', required=False) parser.add_argument('--dir', help='Run all executables from a specified directory', required=False) parser.add_argument('--recursive', action='store_true', help='Recursively process a directory', required=False) parser.add_argument( '--magic', help='Specify file magic database (may be necessary for Windows)', required=False) parser.add_argument('--nolog', action='store_true', help='Do not extract logs back', required=False) parser.add_argument('--norevert', action='store_true', help='Do not revert to snapshot', required=False) parser.add_argument('--post', help='post-execution script', required=False) parser.add_argument('--raw', action='store_true', help='Remove ProcMon filters', required=False) parser.add_argument('--update', action='store_true', help='Update Noriben.py in guest', required=False) parser.add_argument('--screenshot', action='store_true', help='Take screenshot after execution (PNG)', required=False) parser.add_argument('--skip', action='store_true', help='Skip already executed files', required=False) parser.add_argument('-s', '--snapshot', help='Specify VM Snapshot to revert to', required=False) parser.add_argument('--vmx', help='Specify VM VMX', required=False) parser.add_argument('--ignore', help='Ignore files or folders that contain this term', required=False) parser.add_argument('--nonoriben', action='store_true', help='Do not run Noriben in guest, just malware', required=False) # Do not run Noriben script parser.add_argument( '--os', help='Specify Windows or Mac for that specific vmrun path', required=False) parser.add_argument( '--defense', action='store_true', help='Extract Carbon Black Defense log to host', required=False ) # Particular to Carbon Black Defense. Use as example of adding your own files args = parser.parse_args() if not args.file and not args.dir: print( '[!] A filename or directory name are required. Run with --help for more options' ) sys.exit(1) if args.recursive and not args.dir: print('[!] Directory Recursive option specified, but not a directory') sys.exit(1) if args.os: if args.os in vmrun_os: try: VMRUN = vmrun_os[args.os.lower()] except KeyError: print('[!] Unable to find vmrun entry for value: {}'.format( args.os)) sys.exit(1) else: print('[!] Unable to find vmrun entry for value: {}'.format( args.os)) sys.exit(1) if not file_exists(VMRUN): print('[!] Path to vmrun does not exist: {}'.format(VMRUN)) sys.exit(1) if args.debug: debug = True try: if args.magic and file_exists(args.magic): magicHandle = magic.Magic(magic_file=args.magic) else: magicHandle = magic.Magic() except magic.MagicException as err: dontrun = True if err.message == b'could not find any magic files!': print( '[!] Windows Error: magic files not in path. See Dependencies on:', 'https://github.com/ahupp/python-magic') print( '[!] You may need to manually specify magic file location using --magic' ) print('[!] Error in running magic against file: {}'.format(err)) if args.dir: print( '[!] Directory mode will not function without a magic database. Exiting' ) sys.exit(1) if args.dontrun: dontrun = True if args.snapshot: VM_SNAPSHOT = args.snapshot if args.vmx: if file_exists(os.path.expanduser(args.vmx)): VMX = os.path.expanduser(args.vmx) if args.timeout: timeoutSeconds = args.timeout if not args.dir and args.file and file_exists(args.file): magicResult = getMagic(magicHandle, args.file) if magicResult and (not magicResult.startswith('PE32') or 'DLL' in magicResult): if 'DOS batch' not in magicResult: dontrun = True print( '[*] Disabling automatic running due to magic signature: {}' .format(magicResult)) run_file(args, magicResult, args.file) if args.dir: # and file_exists(args.dir): errorCount = 0 files = list() # sys.stdout = io.TextIOWrapper(sys.stdout.detach(), sys.stdout.encoding, 'replace') for result in glob.iglob(args.dir): for (root, subdirs, filenames) in os.walk(result): for fname in filenames: ignore = False if args.ignore: for item in args.ignore.split(','): if item.lower() in root.lower() or item.lower( ) in fname.lower(): ignore = True if not ignore: files.append(os.path.join(root, fname)) if not args.recursive: break for filename in files: if errorCount >= errorTolerance: print('[!] Too many errors encountered in this run. Exiting.') sys.exit(100) # TODO: This is HACKY. MUST FIX SOON if args.skip and file_exists(filename + '_NoribenReport.zip'): print('[!] Report already run for file: {}'.format(filename)) continue # Front load magic processing to avoid unnecessary calls to run_file magicResult = getMagic(magicHandle, filename) if magicResult and magicResult.startswith( 'PE32') and 'DLL' not in magicResult: if debug: print('{}: {}'.format(filename, magicResult)) execTime = time.time() run_file(args, magicResult, filename) execTimeDiff = time.time() - execTime print('[*] Completed. Execution Time: {}'.format(execTimeDiff))
import StringIO import os.path from avoplot.plugins import AvoPlotPluginSimple from avoplot.series import XYDataSeries from avoplot.persist import PersistentStorage from column_selector import TxtFileDataSeriesSelectFrame #from avoplot.plugins.avoplot_fromfile_plugin.loader import FileLoaderBase import loader try: import magic have_magic = True try: magic.Magic() except Exception, e: warnings.warn(("Your python-magic installation seems to be broken. " "Error message was \'%s\'. Using mimetypes module instead."%e.args)) have_magic = False except ImportError: have_magic = False #required otherwise plugin will not be loaded! plugin_is_GPL_compatible = True def tuple_compare(first, second, element=0): """
class ScannerSpider(BaseScannerSpider): """A spider which uses a scanner to scan all data it comes across.""" name = 'scanner' magic = magic.Magic(mime=True) def __init__(self, scanner, runner, *a, **kw): """Initialize the ScannerSpider with a Scanner object. The configuration will be loaded from the Scanner. """ super().__init__(scanner=scanner, *a, **kw) self.runner = runner self.start_urls = [] self.crawl = False self.do_last_modified_check = False self.setup_spider() def setup_spider(self): raise NotImplementedError def start_requests(self): """Return requests for all starting URLs AND sitemap URLs.""" raise NotImplementedError def parse(self, response): """Process a response and follow all links.""" raise NotImplementedError def handle_error(self, failure): """Handle an error due to a non-success status code or other reason. If link checking is enabled, saves the broken URL and referrers. """ raise NotImplementedError def broken_url_save(self, status_code, status_message, url): logging.info("Handle Error: %s %s" % (status_message, url)) status_message = regex.sub("\[.+\] ", "", status_message) status_message = capitalize_first(status_message) # Add broken URL broken_url = Url(url=url, scan=self.scanner.scan_object, status_code=status_code, status_message=status_message) secure_save(broken_url) return broken_url def scan(self, response): """Scan a response, returning any matches.""" mime_type = self.get_mime_type(response) # Save the URL item to the database if (Processor.mimetype_to_processor_type(mime_type) == 'ocr' and not self.scanner.scan_object.do_ocr): # Ignore this URL return url_object = self.url_save(mime_type, response.request.url) data = response.body self.scanner.scan(data, url_object) def url_save(self, mime_type, url): url_object = Url(url=url, mime_type=mime_type, scan=self.scanner.scan_object) url_object.save() return url_object def get_mime_type(self, response): content_type = response.headers.get('content-type') if content_type: mime_type = parse_content_type(content_type) else: mime_type, encoding = mimetypes.guess_type(response.url) if not mime_type: try: mime_type = self.magic.from_buffer(response.body) except MagicException as me: logging.error(me) return mime_type def check_encoding(self, mime_type, response): if hasattr(response, "encoding"): try: data = response.body.decode(response.encoding) except UnicodeDecodeError: try: # Encoding specified in Content-Type header was wrong, try # to detect the encoding and decode again encoding = chardet.detect(response.body).get('encoding') if encoding is not None: data = response.body.decode(encoding) logging.warning(("Error decoding response as %s. " + "Detected the encoding as %s.") % (response.encoding, encoding)) else: mime_type = self.magic.from_buffer(response.body) data = response.body logging.warning( ("Error decoding response as %s. " + "Detected the mime " + "type as %s.") % (response.encoding, mime_type)) except UnicodeDecodeError: # Could not decode with the detected encoding, so assume # the file is binary and try to guess the mimetype from # the file mime_type = self.magic.from_buffer(response.body) data = response.body logging.error( ("Error decoding response as %s. Detected the " "mime type as %s.") % (response.encoding, mime_type)) else: data = response.body return data, mime_type
def GetUrls(self, keyword_to_search, start, limit, extensions=None): if extensions is None: extensions = {'.jpg', '.png', '.ico', '.gif', '.jpeg'} links = [] spam = ('https://www.google.com/logos', 'https://www.google.com/favicon.ico') for word in keyword_to_search: try: os.makedirs(word) print('Directory Created') except OSError as e: if e.errno != 17: print('error') raise print('starting url') url = f'https://www.google.com/search?q={word}' \ f'&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:' \ f'1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ' print(url) download_page_time = time.time() raw_html = self.DownloadPage(url) print('DOWNLOAD PAGE TIME:', time.time() - download_page_time) end_object = -1 j = 0 a = -1 while j < limit: print('-------Starting-------') if j < start: j += 1 else: raw_html_time = time.time() while True: try: new_line = raw_html.find('"https://', end_object + 1) end_object = raw_html.find('"', new_line + 1) buffor = raw_html.find('\\', new_line + 1, end_object) if buffor != -1: object_raw = (raw_html[new_line + 1:buffor]) else: object_raw = (raw_html[new_line + 1:end_object]) if any(extension in object_raw for extension in extensions): break except Exception as e: print(e) break all = self.DatabaseInside(object_raw) print(f'{len(all)} in Database.') if len(all) == 0: print('RAW HTML FIND TIME', time.time() - raw_html_time) photo_url_time = time.time() path = word.replace(" ", "_") print(path) try: print(f'Photo {j} of {word} starting....') r = requests.get(object_raw, allow_redirects=True, timeout=0.3) if 'html' not in str(r.content): mime = magic.Magic(mime=True) file_type = mime.from_buffer(r.content) file_extension = f'.{file_type.split("/")[1]}' if file_extension not in extensions: raise ValueError() if j == 0 or object_raw in spam: j += 1 raise links.append(object_raw) print("PHOTO URL TIME", time.time() - photo_url_time) file_creating = time.time() a += 1 dir_direction = str(word) + "_" + str( a) + file_extension with open(os.path.join(path, dir_direction), 'wb') as file: file.write(r.content) self.DatabaseConnect(object_raw, word) print(f'Photo {j} of {word} COMPLETED.GUT') print("FILE CREATING TIME", time.time() - file_creating) except Exception as e: print("EXCEPTION", e) j -= 1 j += 1 return links, keyword_to_search
def __interpretfile(self, thisfile): """File opening heuristics. First, assume that files in each folder are indicative of their relative type. Images are in the image folder, for instance. Secondly, assume that non-media folders follow the "news entity" format of title-line, keywords-line, and then body. Prove these heuristics with a Python file-type check. Anything that doesn't pass muster returns "wrongtype". """ magi = magic.Magic(mime=True) card_root = GlobalConfig.get("paths", "data_root") + "/private" base_path = card_root + "/" + self.config.get("paths", self.ctype) fpath = base_path + "/" + thisfile if self.hidden is True: fpath = base_path + "/hidden/" + thisfile try: with open(fpath, 'r', encoding='utf-8') as cfile: ftype = magi.from_file(fpath) # News entries or features are processed the same way if (("text" in ftype) and ((self.config.get("paths", "news") in cfile.name) or (self.config.get("paths", "heading") in cfile.name) or (self.config.get("paths", "quotes") in cfile.name) or (self.config.get("paths", "topics") in cfile.name) or (self.config.get("paths", "features") in cfile.name))): self.title = cfile.readline().replace("\n", "") rawtopics = cfile.readline().replace("\n", "") for item in rawtopics.split(', '): self.topics.append(item) self.body = cfile.read() # Multiple-song playlists if (("text" in ftype) and (self.config.get("paths", "songs") in cfile.name)): self.title = fpath self.topics.append("Song Playlist") self.body = cfile.read() self.__songfiles() # Read song metadata # Single-image cards if ((("jpeg" in ftype) or ("png" in ftype)) and (self.config.get("paths", "images") in cfile.name)): # TODO: alt/img metadata self.title = fpath self.topics.append("Images") self.body = fpath # Single-song orphan cards if ((("mpeg" in ftype) and ("layer iii" in ftype)) and (self.config.get("paths", "songs") in cfile.name)): self.title = fpath # TODO: filename from title self.topics.append("Songs") # TODO: include the album self.body = fpath self.__songfiles() # Read song metadata # If the filename is in unix-time format, track the creation date if thisfile.isdigit(): if int(thisfile) > 1141161200: self.cdate = datetime.fromtimestamp( int(thisfile)).strftime("%B %-d, %Y") else: fnmtime = os.path.getmtime(fpath) self.cdate = datetime.fromtimestamp( int(fnmtime)).strftime("%B %-d, %Y") except IOError: # File got moved in between dirlist caching and us reading it self.topics = [ ] # Makes the card go away if we had an error reading content return self.config.get("card_defaults", "file") if self.hidden is True: return self.config.get("paths", self.ctype) + "/hidden/" + thisfile else: return self.config.get("paths", self.ctype) + "/" + thisfile
def ticket_protocol(user, subject, structure_configuration=None, configuration=None, file_name='test_name', response=b'', attachments_folder=settings.MEDIA_ROOT, attachments_dict={}, test=False): valid_conf = structure_configuration and configuration # Check only if protocol system works # if test and not configuration: if test: prot_url = settings.PROT_URL prot_login = settings.PROT_TEST_LOGIN prot_passw = settings.PROT_TEST_PASSW prot_aoo = settings.PROT_TEST_AOO prot_agd = settings.PROT_AGD_DEFAULT prot_uo = settings.PROT_UO_DEFAULT prot_email = settings.PROT_EMAIL_DEFAULT # prot_id_uo = settings.PROT_UO_ID_DEFAULT prot_titolario = settings.PROT_TITOLARIO_DEFAULT prot_fascicolo_num = settings.PROT_FASCICOLO_DEFAULT prot_fascicolo_anno = settings.PROT_FASCICOLO_ANNO_DEFAULT prot_template = settings.PROTOCOL_XML # for production # elif not test and configuration: elif not test and valid_conf: prot_url = settings.PROT_URL prot_login = structure_configuration.protocollo_username prot_passw = structure_configuration.protocollo_password prot_aoo = structure_configuration.protocollo_aoo prot_agd = structure_configuration.protocollo_agd prot_uo = structure_configuration.protocollo_uo prot_email = structure_configuration.protocollo_email or settings.PROT_EMAIL_DEFAULT # prot_id_uo = configuration.protocollo_id_uo prot_titolario = configuration.protocollo_cod_titolario prot_fascicolo_num = configuration.protocollo_fascicolo_numero prot_fascicolo_anno = configuration.protocollo_fascicolo_anno prot_template = settings.PROTOCOL_XML # for production a custom configuration is necessary elif not test and not valid_conf: raise Exception(_('Missing XML configuration for production')) protocol_data = { # 'wsdl_url' : prot_url, # 'username' : prot_login, # 'password' : prot_passw, # 'template_xml_flusso': prot_template, # Variabili 'oggetto': '{}'.format(subject), # 'matricola_dipendente': user.matricola_dipendente, 'id_persona': user.taxpayer_id, 'nome_persona': user.first_name, 'cognome_persona': user.last_name, # 'denominazione_persona': ' '.join((user.first_name, # user.last_name,)), # attributi creazione protocollo 'aoo': prot_aoo, 'agd': prot_agd, 'uo': prot_uo, 'email': prot_email, # 'uo_id': prot_id_uo, 'id_titolario': prot_titolario, 'fascicolo_numero': prot_fascicolo_num, 'fascicolo_anno': prot_fascicolo_anno } protclass = __import__(settings.CLASSE_PROTOCOLLO, globals(), locals(), ['*']) # wsclient = protclass.Protocollo(**protocol_data) wsclient = protclass.Protocollo(wsdl_url=prot_url, username=prot_login, password=prot_passw, template_xml_flusso=prot_template, strictly_required=True, **protocol_data) logger.info('Protocollazione richiesta {}'.format(subject)) docPrinc = BytesIO() docPrinc.write(response) docPrinc.seek(0) wsclient.aggiungi_docPrinc(docPrinc, nome_doc="{}.pdf".format(file_name), tipo_doc='uniTicket request') # attachments if attachments_dict: for k, v in attachments_dict.items(): file_path = '{}/{}/{}'.format(settings.MEDIA_ROOT, attachments_folder, v) mime = magic.Magic(mime=True) content_type = mime.from_file(file_path) f = open(file_path, 'rb') attachment_response = HttpResponse(f.read(), content_type=content_type) attachment_response[ 'Content-Disposition'] = 'inline; filename=' + v f.close() allegato = BytesIO() allegato.write(attachment_response.content) allegato.seek(0) wsclient.aggiungi_allegato(nome=v, descrizione=subject, fopen=allegato) # print(wsclient.is_valid()) logger.debug(wsclient.render_dataXML()) # print(wsclient.render_dataXML()) prot_resp = wsclient.protocolla() # logger.info('Avvenuta Protocollazione Richiesta {} numero: {}'.format(form.cleaned_data['subject'], # domanda_bando.numero_protocollo)) # raise exception if wsclient hasn't a protocol number assert wsclient.numero return wsclient.numero
"The remote server couldn't fulfill the request, HTTP error code %s" % e.code }, context_instance=RequestContext( request)) # Store temp file. url_temp = NamedTemporaryFile(delete=True) url_temp.write(url.read()) url_temp.flush() # Convert to File object. url_file = File(url_temp).name # Check content type. mime = magic.Magic(mime=True) content_type = mime.from_file(url_file) if not check_allowed_content(content_type): return render_to_response( "error.html", {"error": "File type not supported"}, context_instance=RequestContext(request)) # Create analysis task. task = Analysis() task.owner = request.user task.case = case task.file_name = os.path.basename( urlparse.urlparse(request.POST.get("url")).path) task.image_id = save_file(file_path=url_file, content_type=content_type) task.thumb_id = create_thumb(url_file)
def extract_metadata(file_name, path, pass_fail=False, lda_preamble=False, null_inference=False): """Create metadata JSON from file. :param file_name: (str) file name :param path: (str) absolute or relative path to file :param pass_fail: (bool) whether to exit after ascertaining file class :returns: (dict) metadata dictionary""" with open(path + file_name, 'rU') as file_handle: extension = file_name.split( '.')[-1] if '.' in file_name else "no extension" mime = magic.Magic(mime=True) mime_type = mime.from_file(path + file_name) metadata = { "system": { "file": file_name, "path": path, "extension": extension, "mime_type": mime_type, "size": os.path.getsize(path + file_name), "checksum": sha256(file_handle.read()).hexdigest(), "extractors": [] } } # checksum puts cursor at end of file - reset to beginning for metadata extraction file_handle.seek(0) text_frac_num = 0.2 if "nc" in extension.lower(): try: metadata.update( extract_netcdf_metadata(file_handle, pass_fail=pass_fail)) metadata["system"]["extractors"].append("netcdf") except ExtractionPassed: metadata["system"]["extractors"].append("netcdf") pass except ExtractionFailed: # not a netCDF file pass elif any([i in mime_type for i in ["text", "csv", "xml"]]): try: metadata.update( extract_columnar_metadata(file_handle, pass_fail=pass_fail, lda_preamble=True, null_inference=False)) metadata["system"]["extractors"].append("columnar") # check if LDA was performed successfully on a preamble if "topics" in metadata.keys(): metadata["system"]["extractors"].append("lda") except ExtractionPassed: metadata["system"]["extractors"].append("columnar") pass except ExtractionFailed: # not a columnar file # check if this file is a usable abstract-like file if frac_numeric(file_handle) < text_frac_num: try: metadata.update( extract_topic(file_handle, pass_fail=pass_fail)) metadata["system"]["extractors"].append("lda") except ExtractionPassed: metadata["system"]["extractors"].append("lda") except ExtractionFailed: pass return metadata
def load_index(args, queue_logger): """ Function to load the genes and positions from the indexed GFF. :param args: :param queue_logger: :return: genes, positions :rtype: ((None|collections.defaultdict),(None|collections.defaultdict)) """ # genes, positions = None, None # New: now we are going to use SQLite for a faster experience wizard = magic.Magic(mime=True) if wizard.from_file("{0}.midx".format( args.reference.name)) == b"application/gzip": queue_logger.warning( "Old index format detected. Starting to generate a new one.") raise CorruptIndex("Invalid index file") try: conn = sqlite3.connect("{0}.midx".format(args.reference.name)) cursor = conn.cursor() tables = cursor.execute( "SELECT name FROM sqlite_master WHERE type='table';").fetchall() if sorted(tables) != sorted([("positions", ), ("genes", )]): raise CorruptIndex("Invalid database file") # Integrity check res = cursor.execute("PRAGMA integrity_check;").fetchone() if res[0] != "ok": raise CorruptIndex("Corrupt database, integrity value: {}".format( res[0])) except sqlite3.DatabaseError: raise CorruptIndex("Invalid database file") positions = dict() try: for counter, obj in enumerate( cursor.execute("SELECT * from positions")): chrom, start, end, gid = obj if chrom not in positions: positions[chrom] = collections.defaultdict(list) positions[chrom][(start, end)].append(gid) except sqlite3.DatabaseError: raise CorruptIndex("Invalid index file. Rebuilding.") genes = dict() for gid, obj in cursor.execute("SELECT * from genes"): try: gene = Gene(None, logger=queue_logger) gene.load_dict(msgpack.loads(obj, raw=False), exclude_utr=args.exclude_utr, protein_coding=args.protein_coding, trust_orf=True) if len(gene.transcripts) > 0: genes[gid] = gene else: queue_logger.warning("No transcripts for %s", gid) except (EOFError, json.decoder.JSONDecodeError) as exc: queue_logger.exception(exc) raise CorruptIndex("Invalid index file") except (TypeError, ValueError) as exc: queue_logger.exception(exc) raise CorruptIndex( "Corrupted index file; deleting and rebuilding.") return genes, positions
def run(self): with self.conn: while True: request = b'' while True: try: self.conn.settimeout(5) currRequest = self.conn.recv(1024) except Exception: break request += currRequest if len(currRequest) < 1024: break if request == b'': break decodedData = request.decode() arr = decodedData.split('\r\n') methodArr = arr[0].split(' ') method = methodArr[0] myFile = methodArr[1].split('?')[0] myFile = myFile.replace('%20', ' ') date = datetime.now() date_time = date.strftime("%a %b %-d %H:%M:%S %Y") userIp = self.addr[0] domain = '' userAgent = '' connection = '' rangeVal = '' etagVal = '' for header in arr: if header.find('Host:') != -1: domain = header[header.find('Host:') + 6:] domain = domain.split(':')[0] elif header.find('User-Agent:') != -1: userAgent = header[header.find('User-Agent:') + 12:] elif header.find('Connection:') != -1: connection = header[header.find('Connection:') + 12:] elif header.find('Range: bytes=') != -1: rangeVal = header[header.find('Range: bytes=') + 13:] elif header.find('host:') != -1: domain = header[header.find('host:') + 6:] domain = domain.split(':')[0] elif header.find('If-None-Match:') != -1: etagVal = header[header.find('If-None-Match:') + 15:] header = '' currResponse = b'' if not domain in self.checkDomain[(self.ip, self.port)]: header = 'HTTP/1.1 404 Not Found\r\n' currResponse = 'REQUESTED DOMAIN NOT FOUND\r\n' header += 'Content-Length: ' + str( len(currResponse)) + '\n\n' writeIntoLog(self.log + '/error.log', date_time, userIp, domain, myFile, '404', str(len(currResponse)), userAgent) self.conn.send((header + currResponse).encode()) continue statusCode = '200' try: mfile = open(self.documentRoots[domain] + myFile, 'rb') currResponse = mfile.read() mfile.close() if rangeVal != '': currArr = rangeVal.split('-') statusCode = '206' if currArr[1] != '': if int(currArr[0]) > int(currArr[1]): header = 'HTTP/1.1 416 Requested Range Not Satisfiable status\n\n' writeIntoLog(self.log + '/error.log', date_time, userIp, domain, myFile, '416', str(len(currResponse)), userAgent) self.conn.send(header.encode()) continue currResponse = currResponse[int(currArr[0] ):int(currArr[1]) + 1] else: currResponse = currResponse[int(currArr[0]):] mime = magic.Magic(mime=True) mimetype = str( mime.from_file(self.documentRoots[domain] + myFile)) #to have css in html if userAgent.find('Chrome') != -1 or userAgent.find( 'Mozilla') != -1: if myFile.endswith('.css'): mimetype = 'text/css' if etagVal == str(currResponse.__hash__()): header = 'HTTP/1.1 304 Not Modified \n\n' writeIntoLog(self.log + '/' + domain + '.log', date_time, userIp, domain, myFile, '304', '0', userAgent) self.conn.send(header.encode()) continue header = 'HTTP/1.1 ' + statusCode + ' OK\r\n' header += 'Host:' + domain + '\r\n' header += 'Server: My_Server\r\n' header += 'Date: ' + date_time + '\r\n' header += 'Content-Length: ' + str( len(currResponse)) + '\r\n' header += 'Content-Type: ' + mimetype + '\r\n' header += 'ETag: ' + str(currResponse.__hash__()) + ' \r\n' header += 'Accept-Ranges: bytes\r\n' header += 'Connection: keep-alive\r\n' header += 'Keep-Alive: timeout=5\n\n' except Exception as e: print(e) writeIntoLog(self.log + "/" + domain + ".log", date_time, userIp, domain, myFile, statusCode, str(len(currResponse)), userAgent) if method == 'GET': response = header.encode() response += currResponse elif method == 'HEAD': response = header.encode() self.conn.sendall(response)
def test_mime_encodings(self): m = magic.Magic(mime_encoding=True) self.assert_values(m, { 'text-iso8859-1.txt': 'iso-8859-1', 'text.txt': 'us-ascii', })
def dataurl(blob): f = magic.Magic(mime=True, uncompress=True) mime = f.from_buffer(blob) return "data:" + mime + ";base64," + b64encode(blob).decode()
def test_from_buffer_str_and_bytes(self): m = magic.Magic(mime=True) s = '#!/usr/bin/env python\nprint("foo")' self.assertEqual("text/x-python", m.from_buffer(s)) b = b'#!/usr/bin/env python\nprint("foo")' self.assertEqual("text/x-python", m.from_buffer(b))
def raw_mimetype(self): mime = magic.Magic(mime=True) path = os.path.join(settings.MEDIA_ROOT, self.object.path) return mime.from_file(path)