예제 #1
0
def object_file_path(obj, base_path, folder_name, filename, create_dir=False):
    filename = sanitize_filename(filename)
    storage_dir = os.path.join(base_path, obj.__tablename__, folder_name, str(obj.client_id), str(obj.object_id))
    if create_dir:
        os.makedirs(storage_dir, exist_ok=True)
    storage_path = os.path.join(storage_dir, filename)
    return storage_path, filename
예제 #2
0
def safename(fn):
    """
    returns cross-platform safe name WITHOUT directory
    """
    if isinstance(fn,Path):
        fn = fn.name
    return pathvalidate.sanitize_filename(fn,'-')
예제 #3
0
    def test_normal_pathlike(self, value, replace_text, expected):
        sanitized_name = sanitize_filename(value, replace_text)
        assert sanitized_name == expected
        assert is_pathlike_obj(sanitized_name)

        validate_filename(sanitized_name)
        assert is_valid_filename(sanitized_name)
예제 #4
0
def writeplots(fg, plotprefix, tind=None, odir=None, fmt='.png', anno=None, dpi=None, facecolor=None, doclose=True):
    try:
        if fg is None or odir is None:
            return
    # %%
        draw()  # Must have this here or plot doesn't update in animation multiplot mode!
        # TIF was not faster and was 100 times the file size!
        # PGF is slow and big file,
        # RAW crashes
        # JPG no faster than PNG

        suff = nametime(tind)

        if anno:
            fg.text(0.15, 0.8, anno, fontsize='x-large')
        if pathvalidate is not None:
            cn = Path(odir).expanduser() / pathvalidate.sanitize_filename(plotprefix + suff + fmt)
        else:
            cn = Path(odir).expanduser() / (plotprefix + suff + fmt)

        print('write', cn)

        if facecolor is None:
            facecolor = fg.get_facecolor()

        fg.savefig(cn, bbox_inches='tight', dpi=dpi, facecolor=facecolor, edgecolor='none')

        if doclose:
            close(fg)

    except Exception as e:
        logging.error(f'{e}  when plotting {plotprefix}')
예제 #5
0
파일: utils.py 프로젝트: ispras/lingvodoc
def object_file_path(obj, settings, data_type, filename, create_dir=False):
    filename = sanitize_filename(filename)
    base_path = settings['storage']['path']
    storage_dir = os.path.join(base_path, obj.__tablename__, data_type, str(obj.client_id), str(obj.object_id))
    if create_dir:
        os.makedirs(storage_dir, exist_ok=True)
    storage_path = os.path.join(storage_dir, filename)

    return storage_path, filename
예제 #6
0
파일: common.py 프로젝트: thombashi/thutils
def command_to_filename(command, suffix=""):
    sep_char = "/\\"

    command = command.strip()
    filename = command.replace(" ", "_")
    filename = filename.replace("-", "")
    filename = filename.strip(sep_char).lstrip(sep_char)
    filename = re.sub("[%s]" % re.escape("/\\"), "-", filename)
    filename = pathvalidate.sanitize_filename(filename)
    if dataproperty.is_not_empty_string(suffix):
        filename += "_" + suffix

    return filename
예제 #7
0
    def _preprocess_table_name(self):
        try:
            new_name = pv.sanitize_filename(self._tabledata.table_name, replacement_text="_")
        except TypeError:
            raise NameValidationError(
                "table name must be a string: actual='{}'".format(self._tabledata.table_name)
            )

        new_name = pv.replace_unprintable_char(new_name, replacement_text="")
        new_name = pv.replace_symbol(new_name, replacement_text="_")
        new_name = new_name.replace(" ", "_")
        new_name = re.sub("_+", "_", new_name)
        new_name = new_name.strip("_")

        return new_name
예제 #8
0
def writeplots(fg, t="", odir=None, ctxt="", ext=".png"):
    from matplotlib.pyplot import close

    if odir:
        odir = Path(odir).expanduser()
        odir.mkdir(parents=True, exist_ok=True)

        if isinstance(t, (DataArray)):
            t = datetime.fromtimestamp(t.item() / 1e9, tz=UTC)
        elif isinstance(t, (float, integer_types)):  # UTC assume
            t = datetime.fromtimestamp(t / 1e9, tz=UTC)

            #:-6 keeps up to millisecond if present.
        ppth = odir / pathvalidate.sanitize_filename(ctxt + str(t)[:-6] + ext, "-").replace(" ", "")

        print("saving {}".format(ppth))

        fg.savefig(str(ppth), dpi=100, bbox_inches="tight")

        close(fg)
예제 #9
0
def create_filepath(course, path):
    course_name = course["OrgUnit"]["Name"]
    course_name = course_prefix.sub("", course_name)
    return "/".join([sanitize_filename(course_name)] +
                    [sanitize_filename(module["Title"]) for module in path])
예제 #10
0
 def test_normal_multibyte(self, value, replace_text, expected):
     sanitized_name = sanitize_filename(value, replace_text)
     assert sanitized_name == expected
     validate_filename(sanitized_name)
     assert is_valid_filename(sanitized_name)
예제 #11
0
def download_id_by_type(client,
                        item_id,
                        path,
                        quality,
                        album=False,
                        embed_art=False,
                        albums_only=False):
    """
    Download and get metadata by ID and type (album or track)

    :param Qopy client: qopy Client
    :param int item_id: Qobuz item id
    :param str path: The root directory where the item will be downloaded
    :param int quality: Audio quality (5, 6, 7, 27)
    :param bool album: album type or not
    :param embed_art album: Embed cover art into files
    :param bool albums_only: Ignore Singles, EPs and VA releases
    """
    count = 0

    if album:
        meta = client.get_album_meta(item_id)

        if albums_only and (meta.get("release_type") != "album"
                            or meta.get("artist").get("name")
                            == "Various Artists"):
            print("Ignoring Single/EP/VA: " + meta.get("title", ""))
            return

        album_title = get_title(meta)
        print("\nDownloading: {}\n".format(album_title))
        dirT = (
            meta["artist"]["name"],
            album_title,
            meta["release_date_original"].split("-")[0],
            get_format(client, meta, quality),
        )
        sanitized_title = sanitize_filename("{} - {} [{}] [{}]".format(*dirT))
        dirn = os.path.join(path, sanitized_title)
        os.makedirs(dirn, exist_ok=True)
        get_extra(meta["image"]["large"], dirn)
        if "goodies" in meta:
            try:
                get_extra(meta["goodies"][0]["url"], dirn, "booklet.pdf")
            except Exception as e:
                print("Error: " + e)
        media_numbers = [
            track["media_number"] for track in meta["tracks"]["items"]
        ]
        is_multiple = True if len([*{*media_numbers}]) > 1 else False
        for i in meta["tracks"]["items"]:
            parse = client.get_track_url(i["id"], quality)
            if "sample" not in parse and parse["sampling_rate"]:
                is_mp3 = True if int(quality) == 5 else False
                download_and_tag(
                    dirn,
                    count,
                    parse,
                    i,
                    meta,
                    False,
                    is_mp3,
                    embed_art,
                    i["media_number"] if is_multiple else None,
                )
            else:
                print("Demo. Skipping")
            count = count + 1
    else:
        parse = client.get_track_url(item_id, quality)

        if "sample" not in parse and parse["sampling_rate"]:
            meta = client.get_track_meta(item_id)
            track_title = get_title(meta)
            print("\nDownloading: {}\n".format(track_title))
            dirT = (
                meta["album"]["artist"]["name"],
                track_title,
                meta["album"]["release_date_original"].split("-")[0],
                get_format(client, meta, quality, True),
            )
            sanitized_title = sanitize_filename(
                "{} - {} [{}] [{}]".format(*dirT))
            dirn = os.path.join(path, sanitized_title)
            os.makedirs(dirn, exist_ok=True)
            get_extra(meta["album"]["image"]["large"], dirn)
            is_mp3 = True if int(quality) == 5 else False
            download_and_tag(dirn, count, parse, meta, meta, True, is_mp3,
                             embed_art)
        else:
            print("Demo. Skipping")
    print("\nCompleted\n")
예제 #12
0
def ta_upload():

# below functions checks if the file name is in the type of images required
    def allowed_file(filename):
        return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS


# to render the basic html page that will showcase upload api to the world  if we get a GET request
    if request.method == 'GET':
        return render_template('users/ta_upload.html')

# below are a number of validation checks for the api . These are done using wtforms in the main app

    if 'file' not in request.files:
        error_msg = "Did you forget the file? "
        return Response(error_msg, status=401)

    username = request.form.get('username')
    password = request.form.get('password')
    file = request.files.get('file')

    if username == None or password == None or file == None:
        error_msg = "Welcome - Please input credentials and file"
        return Response(error_msg, status=401)

    if len(username) <2  or len(username) > 30 or len(password) <2  or len(password) > 30:
        error_msg = "Username or Password do not meet the length requirements"
        return Response(error_msg, status=401)


    user = User.query.filter_by(username=username).first()

    if user is None:
        error_msg = "User Not Registered!!"
        return Response(error_msg, status=401)


    if not user.check_password(password):
        error_msg = "Invalid username and password combination"
        return Response(error_msg, status=401)

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename) # checks for any spaces and security flaws and fixes them
        clean_name = sanitize_filename(urllib.parse.unquote(filename)).replace(" ", "_") # this function performs similar task and used as an insurance


        rand_name =  urandom(4).hex() +  clean_name
        # save original image
        real_name = ntpath.basename(image_uploadset.save(file, folder='originals', name=rand_name))

        print("rand_name",rand_name)
        print("real_name",real_name)

        original_path = './images/originals/'+real_name

        image_cv = cv2.imread(original_path)

        # Face detection
        ## need to tell the location of the classifier manually!!
        #face_cascade = cv2.CascadeClassifier('C:/Users/mihir/PycharmProjects/A1_ECE1779/venv/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml')
        #face_cascade = cv2.CascadeClassifier('/Users/bibinsebastian/Dropbox/UofT/ECE1779/A2_ECE1779/venv/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml')
        face_cascade = cv2.CascadeClassifier('/home/ubuntu/Desktop/ece1779/A2_ECE1779/venv/lib/python3.7/site-packages/cv2/data/haarcascade_frontalface_default.xml')
        #face_cascade = cv2.CascadeClassifier('/Users/ragnoletto/Documents/School/UofT/ECE1779/assignments/A1_ECE1779/venv/lib/python3.7/site-packages/cv2/data/haarcascade_frontalface_default.xml')

        gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)

        face_cv = image_cv.copy() #this is neeeded since it will throw error if there is no image in the picture

        for (x,y,w,h) in faces:
            face_cv = cv2.rectangle(image_cv, (x,y) , (x + w,y + h), (0,0,255),8)
            roi_gray = gray[y:y+h, x:x+w]
            roi_color = face_cv[y:y+h, x:x+w]


        picture_path = './images/faces/'+real_name
        cv2.imwrite(picture_path, face_cv)
        cv2.waitKey(0)

        # try:
        #     image = Image(user_id=user.id, file_name=real_name, num_faces = len(faces))
        #     db.session.add(image)
        #     db.session.commit()
        #     success_msg = "File Upload Success"
        #     return Response(success_msg, status=201)
        # except Exception as error:
        #     db.session.rollback()
        #     return Response("DB Error, rollback", status=500)



        try:
            image = Image(user_id=user.id, file_name=real_name, num_faces = len(faces))
            db.session.add(image)
            db.session.commit()

            s3.upload_file(original_path,webapp.config["S3_BUCKET"],real_name)
            s3.upload_file(picture_path,webapp.config["S3_BUCKET"],'f_'+real_name)

            os.remove(original_path)
            os.remove(picture_path)

            success_msg = "Upload Success"
            return Response(success_msg, status=201)

        except Exception as error:
            db.session.rollback()
            return Response("DB Error, rollback", status=500)

    else:
        error_msg = "Not an Image"
        return Response(error_msg, status=401)
예제 #13
0
def get_comment_file():
    return "%s/%s.json" % (
        COMMENTSDIR, pathvalidate.sanitize_filename(get_current_showname()))
예제 #14
0
    def download_from_ism(self, url, output_name, output_format):
        r = self.session.get(f'{url}/manifest')
        manifest = xmltodict.parse(r.content, force_list={'StreamIndex', 'c'})
        self.logger.debug(json.dumps(manifest, indent=4))

        streams = [x for x in manifest['SmoothStreamingMedia']['StreamIndex'] if x['@Type'] == 'text']

        has_subtitles = False

        for (index, stream) in enumerate(streams):
            has_subtitles = True

            lang = stream['@Language'].lower()

            fmt = stream['QualityLevel']['@FourCC'].upper()
            if fmt != 'TTML':
                self.logger.error(f'Stream has unsupported subtitle format: {fmt!r}')
                sys.exit(1)

            index += 1
            output = f'{output_name.replace(" ", ".")}.{lang}.{index}.{output_format}'
            output = pathvalidate.sanitize_filename(output)
            output = os.path.join(self.args.output_dir, output)
            self.logger.info(f'Downloading subtitle track #{index} ({lang})')

            path = stream['@Url'].replace('{bitrate}', stream['QualityLevel']['@Bitrate'])
            t = 0
            ts = []

            for c in stream['c']:
                if c.get('@t'):
                    t = int(c['@t'])
                    ts.append(t)

                if not c.get('@d'):
                    # Stream only has a single segment
                    break

                for i in range(c.get('@r', 1)):
                    t += int(c['@d'])
                    ts.append(t)

            ts = ts[:-1]  # Remove nonexistent last segment

            xml = {'tt': {'body': {'div': {'p': []}}}}

            for t in tqdm(ts, unit='seg', disable=self.args.debug):
                seg_url = f'{url}/{path.replace("{start time}", str(t))}'
                seg = self.session.get(seg_url).content

                if not seg:
                    # Empty segment
                    continue

                data = self.ismt_to_ttml(seg).decode('utf-8')

                assert '{{BR}}' not in data, 'input data contains br placeholder'
                data = re.sub(r'(?i)<br\s*/?>\s*(</br>)?', '{{BR}}', data)

                xml_seg = xmltodict.parse(
                    data,
                    force_list={'p'},
                    process_namespaces=True,
                    namespaces={
                        'http://www.w3.org/XML/1998/namespace': None,
                        'http://www.w3.org/2006/10/ttaf1': None,
                        'http://www.w3.org/2006/10/ttaf1#metadata': None,
                        'http://www.w3.org/2006/10/ttaf1#styling': None,
                        'http://www.w3.org/ns/ttml': None,
                    },
                )

                if i == 0:
                    fps_base = xml_seg['tt'].get('@ttp:frameRate')
                    fps_mult = xml_seg['tt'].get('@ttp:frameRateMultiplier')

                    if xml_seg['tt']['body']['div'] is None:
                        xml_seg['tt']['body']['div'] = {'p': []}

                    if fps_base:
                        if fps_mult:
                            mult = [int(x) for x in fps_mult.split(' ')]
                            mult = truediv(*mult)
                        else:
                            mult = 1

                        fps = fps_base * fps_mult
                    else:
                        fps = 30  # Per TTML spec

                div = xml_seg['tt']['body']['div']

                if div is None:
                    # Empty subtitle file
                    continue

                subs = div.get('p', [])

                scale = int(stream['@TimeScale'])
                offset = t / scale

                for p in subs:
                    for a in ('@begin', '@end'):
                        tc = p[a]
                        if '.' in tc:
                            (h, m, s) = [float(x) for x in tc.split(':')]
                            f = 0
                        else:
                            (h, m, s, f) = [int(x) for x in tc.split(':')]
                        total = round(h*3600 + m*60 + s + f/fps + offset, 3)
                        p[a] = f'{total}s'

                    begin = float(p['@begin'][:-1])
                    end = float(p['@end'][:-1])

                    if end < begin:
                        self.logger.error(
                            f'End time is earlier than start time ({end} < {begin})',
                        )
                        return

                xml['tt']['body']['div']['p'].extend(subs)

            xml_data = xmltodict.unparse(xml, pretty=True)
            xml_data = xml_data.replace('{{BR}}', '<br />')

            os.makedirs(self.args.output_dir, exist_ok=True)

            self.logger.info(f'Converting and saving to {output}')

            with open(output, 'wb') as fd:
                if output_format == 'ttml':
                    fd.write(xml_data.encode('utf-8-sig'))
                elif output_format == 'srt':
                    r = pycaption.DFXPReader().read(xml_data)
                    w = pycaption.SRTWriter().write(r)
                    fd.write(w.encode('utf-8-sig'))

        if not has_subtitles:
            self.logger.info('No subtitles available')
예제 #15
0
 def test_normal_null_values(self, value, expected):
     assert sanitize_filename(value) == expected
예제 #16
0
파일: sanitize.py 프로젝트: oroschz/arkive
def sanitize_name(name: str) -> str:
    new_name = sanitize_filename(name)
    return "BLANK" if new_name is "" else new_name
def write_out_html(subject, folder_name, body):
    """Not pretty, needs improving, especially with the arguments"""
    subject = sanitize_filename(subject)
    filename = f"{subject[:50]}.html"
    file_path = enumerate_file_path(os.path.join(folder_name, filename))
    open(file_path, "w").write(body)
SAVE_LOCATION = "/Users/Andrew/Desktop/Oxford_emails/"
SAVE_FILES = True
FOLDER_LIMIT = None

with open("credentials.json", "r") as read_file:
    credentials = json.load(read_file)

username = credentials["username"]
password = credentials["password"]
server = credentials["server_name"]

with imap_tools.MailBox(server).login(username, password) as mailbox:
    for folder in mailbox.folder.list():
        mailbox_name = folder["name"]
        mailbox.folder.set(mailbox_name)
        mailbox_folder = make_folder_if_absent(SAVE_LOCATION, sanitize_filename(mailbox_name))
        for i, msg in enumerate(mailbox.fetch(reverse=False, mark_seen=False, limit=FOLDER_LIMIT)):

            if SAVE_FILES:
                # Make folder for email thread with same name as msg.subject
                sanitized_subject = sanitize_filename(msg.subject)
                if sanitized_subject == '':
                    sanitized_subject = enumerate_file_path('No subject')
                subject_folder = make_folder_if_absent(mailbox_folder, sanitized_subject)

                json_filename = enumerate_file_path(os.path.join(subject_folder, 'message.json'))
                encoded_message = email_to_json.json_encode(msg)
                write_to_file(json_filename, encoded_message, as_bytes=False)

                # To decode json representation of imap_tools.message.MailMessage object:
                # b = email_to_json.json_decode(a)
예제 #19
0
def dojointplot(ds,spec,freq,beamazel,optical,optazel,optlla,isrlla,heightkm,utopt,P):
    """
    ds: radar data

    f1,a1: radar   figure,axes
    f2,a2: optical figure,axes
    """
    assert isinstance(ds,DataArray)

#%% setup master figure
    fg = figure(figsize=(8,12))
    gs = gridspec.GridSpec(2, 1, height_ratios=[3,1])
#%% setup radar plot(s)
    a1 = fg.add_subplot(gs[1])
    plotsumionline(ds,a1,expfn(P['isrfn']),P['zlim'])

    h1 = a1.axvline(nan,color='k',linestyle='--')
    t1 = a1.text(0.05,0.95,'time=',transform=a1.transAxes,va='top',ha='left')
#%% setup top optical plot
    if optical is not None:
        a0 = fg.add_subplot(gs[0])
        clim = compclim(optical,lower=10,upper=99.99)
        h0 = a0.imshow(optical[0,...],origin='lower',interpolation='none',cmap='gray',
                       norm=vidnorm,vmin=clim[0],vmax=clim[1])
        a0.set_axis_off()
        t0 = a0.set_title('')

#%% plot magnetic zenith beam
        azimg = optazel[:,1].reshape(optical.shape[1:])
        elimg = optazel[:,2].reshape(optical.shape[1:])

        optisrazel = projectisrhist(isrlla,beamazel,optlla,optazel,heightkm)

        br,bc = findindex2Dsphere(azimg,elimg,optisrazel['az'],optisrazel['el'])

        #hollow beam circle
    #    a2.scatter(bc,br,s=500,marker='o',facecolors='none',edgecolor='red', alpha=0.5)

        #beam data, filled circle
        s0 = a0.scatter(bc,br,s=2700,alpha=0.6,linewidths=3,
                        edgecolors=jet(linspace(ds.min().item(), ds.max().item())))

        a0.autoscale(True,tight=True)
        fg.tight_layout()
#%% time sync
    tisr = ds.time.values
    Iisr,Iopt = timesync(tisr,utopt,P['tlim'])
#%% iterate
    first = True
    Writer = anim.writers['ffmpeg']
    writer = Writer(fps=5,
                    metadata=dict(artist='Michael Hirsch'),
                    codec='ffv1')

    ofn = Path(P['odir']).expanduser() / ('joint_' +
            pathvalidate.sanitize_filename(str(datetime.fromtimestamp(utopt[0]))[:-3]) + '.mkv')

    print('writing {}'.format(ofn))
    with writer.saving(fg, str(ofn),150):
      for iisr,iopt in zip(Iisr,Iopt):
        ctisr = tisr[iisr]
#%% update isr plot
        h1.set_xdata(ctisr)
        t1.set_text('isr: {}'.format(ctisr))
#%% update hist plot
        if iopt is not None:
            ctopt = datetime.fromtimestamp(utopt[iopt], tz=UTC)
            h0.set_data(optical[iopt,...])
            t0.set_text('optical: {}'.format(ctopt))
            s0.set_array(ds.loc[ctisr]) #FIXME circle not changing magnetic zenith beam color? NOTE this is isr time index
#%% anim
        if first and iopt is not None:
            plotazelscale(optical[iopt,...],azimg,elimg)
            show()
            first=False
        #
        draw(); pause(0.01)

        writer.grab_frame(facecolor='k')

        if ofn.suffix == '.png':
            try:
                writeplots(fg,ctopt,ofn,P['makeplot'],ctxt='joint')
            except UnboundLocalError:
                writeplots(fg,ctisr,ofn,P['makeplot'],ctxt='isr')
예제 #20
0
 def test_exception_type(self, value, expected):
     with pytest.raises(expected):
         sanitize_filename(value)
     assert not is_valid_filename(value)
예제 #21
0
def create_filename_without_extension(item):
    return sanitize_filename(item["Title"])
예제 #22
0
def getImage(directory, image):
  filename = sanitize_filename(image)
  filepath = path.join(os.environ['EFFIGY_PATH'], directory, filename)
  return send_file(filepath, attachment_filename=filename)
예제 #23
0
    def download(
        self,
        quality: str,
        file_name: str = None,
        path: str = None,
        multi_threading: bool = False,
        max_threads: int = None,
        use_ffmpeg: bool = True,
        include_intro: bool = False,
        delete_chunks: bool = True,
        on_progress=None,
        print_progress: bool = True,
    ):
        """Downloads the current episode in your selected quality.

        :param quality: The quality that you want to dowload. All the available 
                        are "ld" (360p), "sd" (480p), "hd" (720p) and "fullhd".
                        Note that all qualities may not be available for all episodes.
        :type quality: str
        :param file_name: The name of the downloaded file. If left to None, the file will be named
                        "[anime_name] - [episode_number].mp4". Macros are also supported,
                        "<anititle>" will be replaced by the anime name, <ep> will be replaced 
                        by episode number and <eptitle> will be replaced by the episodes title.
                        For example, lets say that the episode in question is the third episode of
                        the anime called "Vinland Saga". The title of the episode is "Troll". Suppose
                        we pass the string ``"<anititle> - <ep> - <eptitle>"``, the resulting file will be 
                        named ``"Vinland Saga - 3 - Troll.mp4"``
        :type file_name: str, optional
        :param path: Path to where you want the downloaded video to be, defaults to None. If left None
                     the current working directory i.e. the directory where the script calling the method 
                     lives is taken as the path.
        :type path: str, optional
        :param multi_threading: Set this to true to enable multithreaded downloading, defaults to False.
                      Enabling this can offer significant performance benefits especially on faster
                      connections. However this comes with a trade off. Using multi threading negatively
                      affects download resumabilty. Therefore it is recommended that this be set to False 
                      when using slower connections.
        :type multi_threading: bool, optional
        :param max_threads: Set the maximum number of threads that will be used at once when using multi
                      threaded downloading, defaults to None. When None, the maximum number of feasible
                      threads will be used i.e one thread per chunk. 
        :type max_threads: int, optional
        :param use_ffmpeg: Enable/disable using FFMPEG to combine the downloaded chunks, defaults to True.
                      Requires FFMPEG. It is recommended to keep this enabled as not using FFMPEG can cause
                      video playback issues on certain players. Using FFMPEG also results in noticibly smaller
                      files.
        :type use_ffmpeg: bool, optional
        :param include_intro: Set this to true to include the 5 second aniwatch intro, defaults to False.
                      It is recommended to skip the intro as this causes issues when combining the chunks 
                      with FFMPEG.
        :type include_intro: bool, optional
        :param delete_chunks: Set this to False to not delete the downloaded .ts chunks after they have been, 
                              combined into a single mp4 file. Defaults to True
        :type delete_chunks: bool, optional
        :param on_progress: Register a function that is called every time a new chunk is downloaded. The the number
                      of chunks done and the total number of chunks are passed as arguments to the function in that
                      exact order. Defaults to None.              
        :type on_progress: function, optional
        :param print_progress: Print the number of chunks done and the total number of chunks to the console, 
                      defaults to True.
        :type print_progress: bool, optional
        """
        m3u8 = self.get_m3u8(quality)

        if file_name is None:
            file_name = f"{self.anime_title[:128]}-{self.number}"
        else:
            file_name = (file_name.replace("<ep>", str(self.number)).replace(
                "<eptitle>", self.title).replace("<anititle>",
                                                 self.anime_title[:128]))
        file_name = sanitize_filename(file_name)
        current_path = os.getcwd()
        if path:
            os.chdir(path)

        if multi_threading:
            dlr = MultiThreadDownloader(self.__network, m3u8, file_name,
                                        self.ep_id, max_threads, use_ffmpeg,
                                        include_intro, delete_chunks,
                                        self.__generate_default_headers())
        else:
            dlr = Downloader(self.__network, m3u8, file_name, self.ep_id,
                             use_ffmpeg, include_intro, delete_chunks,
                             self.__generate_default_headers())

        dlr.download()
        dlr.merge()
        if delete_chunks:
            dlr.remove_chunks()
        os.chdir(current_path)
예제 #24
0
    def parse(self, response):
        self.print_dbg("Entered Parse Function")

        try:

            response = response.replace(encoding='utf8',
                                        body=response.text.encode('utf8'))

            self.count = self.count + 1

            local_filename, remote_filename, image_folder = self.url_to_local_path(
                response.url)

            self.print_dbg("Downloading URL: {} to {}, {}".format(
                response.url, local_filename, image_folder))

            with open(local_filename, 'wb') as f:
                f.write(response.body)

                if self.s3:
                    s3file = self.s3path + remote_filename
                    self.print_dbg(
                        "Uploading to S3 bucket {}. s3 file= {} ".format(
                            self.s3bucket, s3file))
                    self.s3.upload_file(local_filename, self.s3bucket, s3file)

            # response.replace(encoding='UTF-8')

            self.print_dbg("Extracting Image Links")
            pageImages = LinkExtractor(tags=['img'],
                                       attrs=['src'],
                                       deny_extensions=[],
                                       canonicalize=False)

            self.print_dbg("Done Extracting Image Links")

            # self.print_dbg(COLORS.OKBLUE + "Images:", pageImages.extract_links(response), COLORS.ENDC)

            for image in pageImages.extract_links(response):

                # Will download the images - it caches images and does not download same url twice

                self.print_dbg("Looping over image: " + image.url)

                image_url = image.url
                image_url_hash = hashlib.sha224(
                    str(image_url).encode('utf-8')).hexdigest()
                filename = sanitize_filename(image_url[image_url.rfind("/") +
                                                       1:])
                image_path = os.path.join(image_folder, filename)
                cache_file = os.path.join(
                    os.path.join(self.cache_folder, "image_cache"),
                    image_url_hash)
                file_exists = os.path.exists(image_path)

                if os.path.exists(cache_file) and not file_exists:
                    # This file has been cached, no need to redownload, just copy
                    copyfile(cache_file, image_path)
                elif not file_exists:  # do not redownload if image exists on drive
                    try:
                        self.print_dbg("Downloading image:" + image_url)

                        # with eventlet.Timeout(10):
                        #    image_content = requests.get(image_url, timeout=(5, 10), verify=False).content
                        r = http.request('GET',
                                         image_url,
                                         preload_content=False,
                                         timeout=urllib3.Timeout(connect=5.0,
                                                                 read=10.0))

                        with open(image_path, 'wb') as out:
                            while True:
                                data = r.read(65536)
                                if not data:
                                    break
                                out.write(data)

                        r.release_conn()

                        # Write the image
                        #f = open(image_path, 'wb')
                        #f.write(image_content)
                        #f.close()

                        self.print_dbg(
                            "Image download complete. Written to: " +
                            image_path)

                        # Copy to cache
                        copyfile(image_path, cache_file)
                    except Exception as e:
                        self.print_dbg(COLORS.FAIL + "Exception: " + str(e) +
                                       COLORS.ENDC)

                if self.s3:
                    remote_imgname = image_path[len(self.folder):]
                    s3imgfile = self.s3path + remote_imgname
                    self.print_dbg(
                        "Uploading image to S3 bucket: {} ".format(s3imgfile))
                    self.s3.upload_file(image_path, self.s3bucket, s3imgfile)

            self.pages_parsed = self.pages_parsed + 1

            self.print_dbg("Extracting Links")
            link_extractor = LinkExtractor(allow=self.url_path + ".*")
            for link in link_extractor.extract_links(response):

                self.print_dbg(COLORS.OKGREEN + "Yielding: ->" + link.url +
                               COLORS.ENDC)
                yield scrapy.Request(link.url,
                                     callback=self.parse,
                                     errback=self.errback_httpbin)
                # self.print_dbg("Yielded")
                # response.follow(link, self.parse)
        except Exception as e:

            self.print_dbg(COLORS.FAIL + "Exception in parse: " + str(e) +
                           COLORS.ENDC)

            self.print_dbg((response.headers, response.body[:1024]))
            # If we have parsed a lot of pages, reduce the error count
            # from exceptions here
            if self.pages_parsed > 20:
                self.error_count = self.error_count - 1

            self.error_count = self.error_count + 1
예제 #25
0
 def test_normal_space_or_period_at_tail(self, platform, value, expected):
     filename = sanitize_filename(value, platform=platform)
     assert filename == expected
     assert is_valid_filename(filename, platform=platform)
예제 #26
0
def run_(window: sg.Window,
         username,
         progresses: List[Tuple[sg.Text, sg.ProgressBar]],
         stopped: Event,
         lists=DEFAULT_LISTS,
         dupli_mode=0,
         dir=ml.get_default_dir(),
         thread_count=5):

    # dupli_mode: 0-move, 1-copy, 2-download again

    def progress_callback():
        window['--PROGRESS_UPDATE--'].click()

    st.init(thread_count=thread_count)
    with ThreadPoolExecutor(max_workers=thread_count) as executor:
        try:
            for idx_type, list_type in enumerate(lists):
                if stopped.is_set():
                    break

                print(f'[I] Doing list of type "{list_type}"')
                fold = path_joins(ml.library_dir, username, list_type)

                # make anime list folder
                os.makedirs(fold, exist_ok=True)
                print('[I] Created folder of this anime list')

                page_num = 1
                print(f'[I] Doing page number {page_num}')
                was_con_err_list = True
                while was_con_err_list:
                    try:
                        anime_list = mm.get_anime_list_for_page(
                            username, list_type, page_num)
                        was_con_err_list = False
                    except ConnectionError as ce:
                        cprint('[E] Was connection error, retrying')

                anime_list = anime_list  # type: ignore
                while len(anime_list) > 0:
                    if stopped.is_set():
                        break

                    anime_in_page_done = 0
                    for idx_anime, anime in enumerate(anime_list):
                        if stopped.is_set():
                            break

                        if PAGE_LIMIT > 0 >= PAGE_LIMIT and anime_in_page_done:
                            print(f'[I] Debug limit ({PAGE_LIMIT}) reached')
                            break

                        mal_id = anime['mal_id']
                        title, ops, eds = mm.get_cached(mal_id)

                        print(f'[I] Got page for "{title} ({mal_id})')

                        to_search = [f'{title} op {i + 1}' for i in range(ops)] + \
                                    [f'{title} ed {i + 1}' for i in range(eds)]

                        already_downloaded_in_anime = []
                        for idx_video, request in enumerate(to_search):
                            if stopped.is_set():
                                break

                            print(f'[I] YT request: "{request}"')
                            search_res = VideosSearch(
                                request, limit=1).result()['result'][0]
                            response, video_title, video_id = search_res[
                                'link'], search_res['title'], search_res['id']

                            filename = f'{request} ({str(mal_id)}) - {video_id}.mp3'
                            filename = str(sanitize_filename(filename))
                            filepath = path_joins(fold, filename)

                            if dupli_mode != 3:
                                if filename in ml.files_already_downloaded:
                                    _username, _type = ml.files_already_downloaded[
                                        filename]
                                    if username != _username:
                                        cprint(
                                            f'[*] Video found downloaded in folder of different user ({path_joins(_username, _type)}), copying it',
                                            'yellow')
                                        ml.copy(username, list_type, filename)
                                        print('[I] File succesfully copied')
                                        continue

                                    elif dupli_mode == 0:
                                        cprint(
                                            f'[*] Video found downloaded in different list ({path_joins(_username, _type)}), moving it',
                                            'yellow')
                                        ml.move(username, list_type, filename)
                                        print('[I] File succesfully moved')
                                        continue

                                    elif dupli_mode == 1:
                                        cprint(
                                            f'[*] Video found downloaded in different list ({path_joins(_username, _type)}), copying it',
                                            'yellow')
                                        ml.copy(username, list_type, filename)
                                        print('[I] File succesfully copied')
                                        continue

                            if response in already_downloaded_in_anime:
                                cprint(
                                    f'[*] Video "{response}" already downloaded, skipping',
                                    'yellow')
                                continue

                            print('[I] Waiting for free thread')
                            while not st.can_be_added() and not stopped.is_set(
                            ):
                                sleep(0.1)
                                # print(st.threads)
                            print('[I] Found free')

                            if stopped.is_set():
                                break

                            free_index = st.find_free()

                            song_thread = st.SongDownloadThread(
                                response, filepath, request)
                            future = executor.submit(song_thread.run,
                                                     progress_callback,
                                                     free_index)
                            st.add_thread(song_thread, future)
                            sleep(0.001)
                            # cprint(f'[D] threads: {st.threads}')

                        anime_in_page_done += 1

                    page_num += 1
                    print(f'[I] Doing page number {page_num}')
                    was_con_err_list = True
                    while was_con_err_list:
                        try:
                            anime_list = mm.get_anime_list_for_page(
                                username, list_type, page_num)
                            was_con_err_list = False
                        except ConnectionError as ce:
                            cprint('[E] Was connection error, retrying')

        except Exception as e:
            cprint('[E] ' + str(e), 'red')
            stopped.set()

        if stopped.is_set():
            for song_thread in st.threads:
                if song_thread is not None:
                    song_thread[1].cancel()
예제 #27
0
파일: main.py 프로젝트: sadrltd/alphaclone
async def on_member_join(member):
    if not "Welcome" in [
            server['enabled_modules'] for server in bot.config['servers']
            if server['id'] == member.server.id
    ][0]:
        return
    template = Image.open('extras/template.png')
    draw = ImageDraw.Draw(template)
    user = member.name
    server = member.server.name
    img_fraction = 0.50
    fontsize = 16
    member_name = pathvalidate.sanitize_filename(member.name).replace(
        '(', '').replace(')', '').replace(' ', '')
    server_name = pathvalidate.sanitize_filename(member.server.name).replace(
        '(', '').replace(')', '').replace(' ', '')
    if member.avatar_url == "":
        member_avatar = requests.get(member.default_avatar_url)
    else:
        member_avatar = requests.get(member.avatar_url)
    with open('extras/{}.png'.format(member_name), 'wb') as f:
        for chunk in member_avatar.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
    member_avatar = Image.open('extras/{}.png'.format(member_name))
    member_avatar = member_avatar.resize((182, 182), Image.ANTIALIAS)
    template.paste(member_avatar, (34, 71))
    guild_icon = requests.get(member.server.icon_url)
    with open('extras/{}.png'.format(server_name), 'wb') as f:
        for chunk in guild_icon.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
    guild_icon = Image.open('extras/{}.png'.format(server_name))
    guild_icon = guild_icon.resize((64, 64), Image.ANTIALIAS)
    template.paste(guild_icon, (660, 300))
    font = ImageFont.truetype("extras/segoeui.ttf", fontsize)
    while font.getsize(user)[0] < img_fraction * template.size[0]:
        fontsize += 1
        font = ImageFont.truetype("extras/segoeui.ttf", fontsize)
    fontsize -= 1
    font = ImageFont.truetype("extras/segoeui.ttf", fontsize)
    if len(user) < 6:
        font = ImageFont.truetype("extras/segoeui.ttf", 58)
    draw.text((125, 290), user, (0, 0, 0), font=font)

    fontsize = 16
    img_fraction = 0.25
    font = ImageFont.truetype("extras/segoeui.ttf", fontsize)
    while font.getsize(server)[0] < img_fraction * template.size[0]:
        fontsize += 1
        font = ImageFont.truetype("extras/segoeui.ttf", fontsize)
    fontsize -= 1
    font = ImageFont.truetype("extras/segoeui.ttf", fontsize)
    if len(server) < 6:
        font = ImageFont.truetype("extras/segoeui.ttf", 32)
    draw.text((540, 255), server, (0, 0, 0), font=font)

    template.save('extras/finished.png')
    await bot.send_file(
        member.server.get_channel([
            server['welcome_channel'] for server in bot.config['servers']
            if server['id'] == member.server.id
        ][0]), 'extras/finished.png')
    os.remove('extras/finished.png')
    os.remove('extras/{}.png'.format(member_name))
    os.remove('extras/{}.png'.format(server_name))
예제 #28
0
# Determine results count
results_count = search_data['total']
results_pages = math.ceil(results_count / 50)

# Exit script if no results
if (results_count == 0):
    print('No results found for your query, sorry')
    exit()

# Inform user of results count
print('Found ' + str(results_count) + ' images, producing ' +
      str(results_pages) + ' pages of results')

# Create results folders
spaced_name = search_string.replace(":", " ")
folder_name = pathvalidate.sanitize_filename(spaced_name)
downloads_folder = './downloads/' + folder_name + '/'
downloads_json_folder = downloads_folder + 'json/'
try:
    os.makedirs(downloads_folder)
    os.makedirs(downloads_json_folder)
except FileExistsError:
    print("Results directory already exists.")

# Loop through each page
counter = 1
for page in range(0, results_pages):
    # Generate a search URL for this page of results
    page_search_url = config[
        'site'] + '/api/v1/json/search/images?key=' + config[
            'key'] + '&perpage=50&page=' + str(
예제 #29
0
def upload_subjects(
    subject_set_id,
    manifest_files,
    allow_missing,
    remote_location,
    mime_type,
    file_column,
):
    """
    Uploads subjects from each of the given MANIFEST_FILES.

    Example with only local files:

    $ panoptes subject-set upload-subjects 4667 manifest.csv

    Local filenames will be automatically detected in the manifest and
    uploaded, or filename columns can be specified with --file-column.

    If you are hosting your media yourself, you can put the URLs in the
    manifest and specify the column number(s):

    $ panoptes subject-set upload-subjects -r 1 4667 manifest.csv

    $ panoptes subject-set upload-subjects -r 1 -r 2 4667 manifest.csv

    Any local files will still be detected and uploaded.
    """
    if (len(manifest_files) > 1
            and any(map(lambda m: m.endswith('.yaml'), manifest_files))):
        click.echo(
            'Error: YAML manifests must be processed one at a time.',
            err=True,
        )
        return -1
    elif manifest_files[0].endswith('.yaml'):
        with open(manifest_files[0], 'r') as yaml_manifest:
            upload_state = yaml.load(yaml_manifest, Loader=yaml.FullLoader)
        if upload_state['state_version'] > CURRENT_STATE_VERSION:
            click.echo(
                'Error: {} was generated by a newer version of the Panoptes '
                'CLI and is not compatible with this version.'.format(
                    manifest_files[0], ),
                err=True,
            )
            return -1
        if upload_state['subject_set_id'] != subject_set_id:
            click.echo(
                'Warning: You specified subject set {} but this YAML '
                'manifest is for subject set {}.'.format(
                    subject_set_id,
                    upload_state['subject_set_id'],
                ),
                err=True,
            )
            click.confirm('Upload {} to subject set {} ({})?'.format(
                manifest_files[0],
                subject_set_id,
                SubjectSet.find(subject_set_id).display_name,
            ),
                          abort=True)
            upload_state['subject_set_id'] = subject_set_id
        resumed_upload = True
    else:
        upload_state = {
            'state_version': CURRENT_STATE_VERSION,
            'subject_set_id': subject_set_id,
            'manifest_files': manifest_files,
            'allow_missing': allow_missing,
            'remote_location': remote_location,
            'mime_type': mime_type,
            'file_column': file_column,
            'waiting_to_upload': [],
            'waiting_to_link': {},
        }
        resumed_upload = False

    remote_location_count = len(upload_state['remote_location'])
    mime_type_count = len(upload_state['mime_type'])
    if remote_location_count > 1 and mime_type_count == 1:
        upload_state['mime_type'] = (upload_state['mime_type'] *
                                     remote_location_count)
    elif remote_location_count > 0 and mime_type_count != remote_location_count:
        click.echo(
            'Error: The number of MIME types given must be either 1 or equal '
            'to the number of remote locations.',
            err=True,
        )
        return -1

    def validate_file(file_path):
        if not os.path.isfile(file_path):
            click.echo(
                'Error: File "{}" could not be found.'.format(file_path, ),
                err=True,
            )
            return False

        file_size = os.path.getsize(file_path)
        if file_size == 0:
            click.echo(
                'Error: File "{}" is empty.'.format(file_path, ),
                err=True,
            )
            return False
        elif file_size > MAX_UPLOAD_FILE_SIZE:
            click.echo(
                'Error: File "{}" is {}, larger than the maximum {}.'.format(
                    file_path,
                    humanize.naturalsize(file_size),
                    humanize.naturalsize(MAX_UPLOAD_FILE_SIZE),
                ),
                err=True,
            )
            return False
        return True

    def get_index_fields(headers):
        index_fields = [
            header.lstrip('%') for header in headers if header.startswith('%')
        ]
        return ",".join(str(field) for field in index_fields)

    subject_set = SubjectSet.find(upload_state['subject_set_id'])
    if not resumed_upload:
        subject_rows = []
        for manifest_file in upload_state['manifest_files']:
            with open(manifest_file, 'U') as manifest_f:
                file_root = os.path.dirname(manifest_file)
                r = csv.reader(manifest_f, skipinitialspace=True)
                headers = next(r)
                # update set metadata for indexed sets
                index_fields = get_index_fields(headers)
                if index_fields:
                    subject_set.metadata['indexFields'] = index_fields
                    subject_set.save()
                # remove leading % from subject metadata headings
                cleaned_headers = [header.lstrip('%') for header in headers]
                for row in r:
                    metadata = dict(zip(cleaned_headers, row))
                    files = []
                    if not upload_state['file_column']:
                        upload_state['file_column'] = []
                        for field_number, col in enumerate(row, start=1):
                            file_path = os.path.join(file_root, col)
                            if os.path.exists(file_path):
                                upload_state['file_column'].append(
                                    field_number, )
                                if not validate_file(file_path):
                                    return -1
                                files.append(file_path)
                    else:
                        for field_number in upload_state['file_column']:
                            file_path = os.path.join(file_root,
                                                     row[field_number - 1])
                            if not validate_file(file_path):
                                return -1
                            files.append(file_path)

                    for field_number, _mime_type in zip(
                            upload_state['remote_location'],
                            upload_state['mime_type'],
                    ):
                        files.append({_mime_type: row[field_number - 1]})

                    if len(files) == 0:
                        click.echo(
                            'Could not find any files in row:',
                            err=True,
                        )
                        click.echo(','.join(row), err=True)
                        if not upload_state['allow_missing']:
                            return -1
                        else:
                            continue
                    subject_rows.append((files, metadata))

                if not subject_rows:
                    click.echo(
                        'File {} did not contain any rows.'.format(
                            manifest_file, ),
                        err=True,
                    )
                    return -1

        subject_rows = list(enumerate(subject_rows))
        upload_state['waiting_to_upload'] = copy.deepcopy(subject_rows)
    else:
        for subject_id, subject_row in upload_state['waiting_to_link'].items():
            try:
                subject = Subject.find(subject_id)
            except PanoptesAPIException:
                upload_state['waiting_to_upload'].append(subject_row)
                del upload_state['waiting_to_link'][subject_id]
        subject_rows = copy.deepcopy(upload_state['waiting_to_upload'])

    pending_subjects = []

    def move_created(limit):
        while len(pending_subjects) > limit:
            for subject, subject_row in pending_subjects:
                if subject.async_save_result:
                    pending_subjects.remove((subject, subject_row))
                    upload_state['waiting_to_upload'].remove(subject_row)
                    upload_state['waiting_to_link'][subject.id] = subject_row
            time.sleep(0.5)

    def link_subjects(limit):
        if len(upload_state['waiting_to_link']) > limit:
            subject_set.add(list(upload_state['waiting_to_link'].keys()))
            upload_state['waiting_to_link'].clear()

    with click.progressbar(
            subject_rows,
            length=len(subject_rows),
            label='Uploading subjects',
    ) as _subject_rows:
        try:
            with Subject.async_saves():
                for subject_row in _subject_rows:
                    count, (files, metadata) = subject_row
                    subject = Subject()
                    subject.links.project = subject_set.links.project
                    for media_file in files:
                        subject.add_location(media_file)
                    subject.metadata.update(metadata)
                    subject.save()

                    pending_subjects.append((subject, subject_row))

                    move_created(MAX_PENDING_SUBJECTS)
                    link_subjects(LINK_BATCH_SIZE)

            move_created(0)
            link_subjects(0)
        finally:
            if (len(pending_subjects) > 0
                    or len(upload_state['waiting_to_link']) > 0):
                click.echo('Error: Upload failed.', err=True)
                if click.confirm(
                        'Would you like to save the upload state to resume the '
                        'upload later?',
                        default=True,
                ):
                    while True:
                        state_file_name = 'panoptes-upload-{}.yaml'.format(
                            subject_set_id, )
                        state_file_name = click.prompt(
                            'Enter filename to save to',
                            default=state_file_name,
                        )

                        if not state_file_name.endswith('.yaml'):
                            click.echo(
                                'Error: File name must end in ".yaml".',
                                err=True,
                            )
                            if click.confirm(
                                    'Save to {}.yaml?'.format(state_file_name),
                                    default=True,
                            ):
                                state_file_name += '.yaml'
                            else:
                                continue
                        if not is_valid_filename(state_file_name):
                            click.echo(
                                'Error: {} is not a valid file name'.format(
                                    state_file_name, ),
                                err=True,
                            )
                            sanitized_filename = sanitize_filename(
                                state_file_name, )
                            if click.confirm(
                                    'Save to {}?'.format(sanitized_filename, ),
                                    default=True,
                            ):
                                state_file_name = sanitized_filename
                            else:
                                continue
                        if os.path.exists(state_file_name):
                            if not click.confirm(
                                    'File {} already exists. Overwrite?'.
                                    format(state_file_name, ),
                                    default=False,
                            ):
                                continue
                        break

                    with open(state_file_name, 'w') as state_file:
                        yaml.dump(upload_state, state_file)
예제 #30
0
 def get_pkg_cache_filepath(self, package_name, filename):
     return self.__get_pkg_cache_dir(package_name).joinpath(
         sanitize_filename(filename))
예제 #31
0
def get_name_from_content_disposition(fname, ContentDisposition):
    result = re.findall('filename=\"(.+)\"', ContentDisposition)
    if len(result) > 0:
        result_name, result_ext = os.path.splitext(result[0])
        return sanitize_filename(fname + result_ext)
    return None
예제 #32
0
 def get_misc_cache_filepath(self, classifier_name, filename):
     return self.__get_misc_cache_dir(classifier_name).joinpath(
         sanitize_filename(filename))
예제 #33
0
def download_and_tag(
    root_dir,
    tmp_count,
    track_url_dict,
    track_metadata,
    album_or_track_metadata,
    is_track,
    is_mp3,
    embed_art=False,
    multiple=None,
):
    """
    Download and tag a file

    :param str root_dir: Root directory where the track will be stored
    :param int tmp_count: Temporal download file number
    :param dict track_url_dict: get_track_url dictionary from Qobuz client
    :param dict track_metadata: Track item dictionary from Qobuz client
    :param dict album_or_track_metadata: Album/track dictionary from Qobuz client
    :param bool is_track
    :param bool is_mp3
    :param bool embed_art: Embed cover art into file (FLAC-only)
    :param multiple: Multiple disc integer
    :type multiple: integer or None
    """
    extension = ".mp3" if is_mp3 else ".flac"

    try:
        url = track_url_dict["url"]
    except KeyError:
        print("Track not available for download")
        return

    if multiple:
        root_dir = os.path.join(root_dir, "Disc " + str(multiple))
        os.makedirs(root_dir, exist_ok=True)

    filename = os.path.join(root_dir, ".{:02}".format(tmp_count) + extension)

    new_track_title = sanitize_filename(track_metadata["title"])
    track_file = "{:02}. {}{}".format(track_metadata["track_number"],
                                      new_track_title, extension)
    final_file = os.path.join(root_dir, track_file)
    if os.path.isfile(final_file):
        print(track_metadata["title"] + " was already downloaded. Skipping...")
        return

    desc = get_description(track_url_dict, track_metadata, multiple)
    tqdm_download(url, filename, desc)
    tag_function = metadata.tag_mp3 if is_mp3 else metadata.tag_flac
    try:
        tag_function(
            filename,
            root_dir,
            final_file,
            track_metadata,
            album_or_track_metadata,
            is_track,
            embed_art,
        )
    except Exception as e:
        print("Error tagging the file: " + str(e))
        os.remove(filename)
예제 #34
0
    def __get_pkg_cache_dir(self, package_name):
        cache_dir = self.__base_dir.joinpath(
            sanitize_filename(package_name).lower())
        cache_dir.makedirs_p()

        return cache_dir
예제 #35
0
    def __processOnlineDocuments(self, isCountRun=False):

        if not self.onlineDocumentsDict:
            return

        menuWidth = 200

        def __printStatus(idx, document, status=""):
            # fill idx to 5 chars
            idx = str(idx)
            idx = idx.zfill(5)
            if not isCountRun:
                self.__printLeftRight(
                    idx + " - " + document["dateCreation"] + " - " +
                    document["name"] + " - " + document["mimeType"],
                    status,
                    ".",
                    menuWidth,
                )

        overwrite = False  # Only download new files
        useSubFolders = self.settings.getBoolValueForKey("useSubFolders")
        outputDir = self.settings.getValueForKey("outputDir")
        isDownloadOnlyFilename = self.settings.getBoolValueForKey(
            "downloadOnlyFilenames")
        downloadFilenameList = self.settings.getValueForKey(
            "downloadOnlyFilenamesArray")
        downloadOnlyFromOnlineArchive = self.settings.getBoolValueForKey(
            "downloadOnlyFromOnlineArchive")

        countAll = len(self.onlineDocumentsDict)
        countProcessed = 0
        countSkipped = 0
        countDownloaded = 0

        # for idx in range(len(self.onlineDocumentsDict)): # documentMeta in enumerate(self.onlineDocumentsDict):
        for idx in self.onlineDocumentsDict:
            documentMeta = self.onlineDocumentsDict[idx]
            docName = documentMeta["name"]
            firstFilename = docName.split(" ", 1)[0]
            docMimeType = documentMeta["mimeType"]
            docCreateDate = documentMeta["dateCreation"]
            isDocAdvertisement = (True if str(
                documentMeta["advertisement"]).lower() == "true" else False)
            isDocArchived = (True if str(
                documentMeta["documentMetaData"]["archived"]).lower() == "true"
                             else False)
            isAlreadyRead = (True if str(
                documentMeta["documentMetaData"]["alreadyRead"]).lower()
                             == "true" else False)

            subFolder = ""
            myOutputDir = outputDir
            countProcessed += 1

            # counting
            if isDocAdvertisement:
                self.onlineAdvertismentIndicesList.append(idx)

            if isDocArchived:
                self.onlineArchivedIndicesList.append(idx)

            if firstFilename in downloadFilenameList:
                self.onlineFileNameMatchingIndicesList.append(idx)

            if not isAlreadyRead:
                self.onlineUnreadIndicesList.append(idx)

            # check for setting "only download if filename is in filename list"
            if downloadOnlyFromOnlineArchive and not isDocArchived:
                __printStatus(idx, documentMeta, "SKIPPED - not in archive")
                countSkipped += 1
                continue

            # check for setting "only download if filename is in filename list"
            if isDownloadOnlyFilename and not firstFilename in downloadFilenameList:
                __printStatus(idx, documentMeta,
                              "SKIPPED - filename not in filename list")
                countSkipped += 1
                continue

            if docMimeType == "application/pdf":
                subFolder = firstFilename
                docName += ".pdf"
            elif docMimeType == "text/html":
                docName += ".html"
                subFolder = "html"

            if useSubFolders:
                myOutputDir = os.path.join(outputDir, subFolder)
                if not os.path.exists(myOutputDir):
                    os.makedirs(myOutputDir)

            filepath = os.path.join(myOutputDir, sanitize_filename(docName))

            # check if already downloaded
            if os.path.exists(filepath):
                self.onlineAlreadyDownloadedIndicesList.append(idx)
                if not overwrite:
                    __printStatus(idx, documentMeta, "SKIPPED - no overwrite")
                    countSkipped += 1
                    continue
            else:
                self.onlineNotYetDownloadedIndicesList.append(idx)

            # do the download
            if not bool(self.settings.getBoolValueForKey(
                    "dryRun")) and not isCountRun:
                docContent = self.conn.downloadMessage(documentMeta)
                moddate = time.mktime(
                    datetime.datetime.strptime(docCreateDate,
                                               "%Y-%m-%d").timetuple())
                with open(filepath, "wb") as f:
                    f.write(docContent)
                    # shutil.copyfileobj(docContent, f)
                os.utime(filepath, (moddate, moddate))
                __printStatus(idx, documentMeta, "DOWNLOADED")
                countDownloaded += 1
            else:
                __printStatus(
                    idx, documentMeta,
                    "DOWNLOADED - dry run, so not really downloaded")
                countDownloaded += 1

        # last line, summary status:
        if not isCountRun:
            menuWidth = 74
            self.__printFullWidth("--", "center", "-", menuWidth)
            self.__printFullWidth("Status Files Downloading", "left", "-",
                                  menuWidth)
            print("All: " + str(countAll) + " files")
            print("Processed: " + str(countProcessed) + " files")
            print("Downloaded: " + str(countDownloaded) + " files")
            print("Skipped: " + str(countSkipped) + " files")
                path = f"{output}/" \
                    f"{sanitize_filename(course.name)}/" \
                    f"{sanitize_filename(module.name)}/"
                if not os.path.exists(path):
                    os.makedirs(path)

                item_type = item.type
                print(f"{course.name} - "
                      f"{module.name} - "
                      f"{item.title} ({item_type})")

                if item_type == "File":
                    file = canvas.get_file(item.content_id)
                    files_downloaded.add(item.content_id)
                    file.download(path + sanitize_filename(file.filename))
                elif item_type == "Page":
                    page = course.get_page(item.page_url)
                    with open(path + sanitize_filename(item.title) + ".html", "w", encoding="utf-8") as f:
                        f.write(page.body or "")
                    files = extract_files(page.body or "")
                    for file_id in files:
                        if file_id in files_downloaded:
                            continue
                        try:
                            file = course.get_file(file_id)
                            files_downloaded.add(file_id)
                            file.download(path + sanitize_filename(file.filename))
                        except ResourceDoesNotExist:
                            pass
                elif item_type == "ExternalUrl":
예제 #37
0
def create_filename(item):
    extension = item["Url"].split(".")[-1]
    extension = extension.split("?")[0]
    return sanitize_filename(item["Title"]) + "." + extension
예제 #38
0
    def test_dict_convert(self):
        import hashlib
        from time import sleep
        import json
        import os
        from pathvalidate import sanitize_filename
        real_filename = 'dictionary/test_dict_convert.sqlite'
        filename = sanitize_filename(real_filename)
        user_id = self.signup_common()
        self.login_common()
        root_ids = self.create_language('Корень')
        first_hash = hashlib.md5(open(real_filename, 'rb').read()).hexdigest()
        response = self.app.post('/blob', params = {'data_type':'dialeqt_dictionary'},
                                 upload_files=([('blob', real_filename)]))
        self.assertEqual(response.status_int, HTTPOk.code)
        blob_ids = response.json
        response = self.app.get('/blobs/%s/%s' % (blob_ids['client_id'],
                                                          blob_ids['object_id']))
        self.assertEqual(response.status_int, HTTPOk.code)
        file_response = self.app.get(response.json['content'])
        second_hash = hashlib.md5(file_response.body).hexdigest()
        self.assertEqual(first_hash, second_hash)
        response = self.app.post_json('/convert_check', params={'blob_client_id': blob_ids['client_id'],
                                                         'blob_object_id': blob_ids['object_id']})
        self.assertEqual(response.status_int, HTTPOk.code)
        self.assertEqual(response.json, [])
        # a = input()
        response = self.app.post_json('/convert', params={'blob_client_id': blob_ids['client_id'],
                                                         'blob_object_id': blob_ids['object_id'],
                                                          'parent_client_id':root_ids['client_id'],
                                                          'parent_object_id':root_ids['object_id']})
        self.assertEqual(response.status_int, HTTPOk.code)
        self.assertDictEqual(response.json, {"status": "Your dictionary is being converted."
                                                       " Wait 5-15 minutes and you will see new dictionary in your dashboard."})
        not_found = True
        for i in range(3):
            response = self.app.post_json('/dictionaries', params={'user_created': [user_id]})
            if response.json['dictionaries']:
                not_found = False
                break
            sleep(10)
        if not_found:
            self.assertEqual('error', 'dictionary was not found')
        dict_ids = response.json['dictionaries'][0]
        for i in range(20):
            response = self.app.get('/dictionary/%s/%s/state' % (dict_ids['client_id'], dict_ids['object_id']))
            if response.json['status'].lower() == 'Converting 100%'.lower():
                break
            sleep(60)
        response = self.app.get('/dictionary/%s/%s/perspectives' % (dict_ids['client_id'], dict_ids['object_id']))
        self.assertEqual(response.status_int, HTTPOk.code)
        persp_ids = response.json['perspectives'][0]
        response = self.app.get('/dictionary/%s/%s/perspective/%s/%s/all'
                                % (dict_ids['client_id'],
                                   dict_ids['object_id'],
                                   persp_ids['client_id'],
                                   persp_ids['object_id']))
        # Uncomment to create json
        json_file = open('dictionary/test_dict_convert.json', 'w')
        json_file.write(json.dumps(response.json))
        json_file.close()

        json_file = open('dictionary/test_dict_convert.json', 'r')
        correct_answer = json.loads(json_file.read())
        self.assertDictEqual(response.json, correct_answer, set_like= True)
예제 #39
0
def parse_title_author(soup):
    header = soup.select_one("#content")
    title_tag = header.h1
    author, title = title_tag.text.split(' \xa0 :: \xa0 ')
    return sanitize_filename(author), sanitize_filename(title)
예제 #40
0
 def test_normal_max_len(self, value, max_len, expected):
     filename = sanitize_filename(value, max_len=max_len)
     assert len(filename) == expected
     assert is_valid_filename(filename, max_len=max_len)
예제 #41
0
 def test_normal_str(self, platform, value, replace_text, expected):
     sanitized_name = sanitize_filename(value, platform=platform, replacement_text=replace_text)
     assert sanitized_name == expected
     assert isinstance(sanitized_name, str)
     validate_filename(sanitized_name, platform=platform)
     assert is_valid_filename(sanitized_name, platform=platform)
예제 #42
0
    def __createFoldersFromList(self, folders, baseFolder=''):
        baseFolder = sanitize_filename(baseFolder)

        for folder in folders:
            folderName = os.path.join(self.path, baseFolder, folder)
            os.makedirs(folderName, True)
예제 #43
0
async def synthetic_ajax(request,
                         url,
                         parameters=None,
                         key=None,
                         sort_key=None,
                         default=None):
    '''
    Stub similar to google_ajax, but grabbing data from local files.

    This is helpful for testing, but it's even more helpful since
    Google is an amazingly unreliable B2B company, and this lets us
    develop without relying on them.

    At some point, we'll want to upgrade this to support small-scale
    deployments, with a directory tree such as e.g.:

    `course_rosters/[course_id].json`

    and

    `course_lists/[teacher_id].json`
    '''
    if settings.settings['roster-data']['source'] == 'test':
        synthetic_data = {
            COURSE_URL: paths.data("courses.json"),
            ROSTER_URL: paths.data("students.json")
        }
    elif settings.settings['roster-data']['source'] == 'filesystem':
        print(request['user'])
        safe_userid = pathvalidate.sanitize_filename(
            request['user']['user_id'])
        courselist_file = "courselist-" + safe_userid
        if parameters is not None and 'courseid' in parameters:
            safe_courseid = pathvalidate.sanitize_filename(
                str(parameters['courseid']))
            roster_file = "courseroster-" + safe_courseid
        else:
            roster_file = "default"
        synthetic_data = {
            ROSTER_URL:
            paths.data("course_rosters/{roster_file}.json".format(
                roster_file=roster_file)),
            COURSE_URL:
            paths.data("course_lists/{courselist_file}.json".format(
                courselist_file=courselist_file))
        }
    else:
        print("PANIC!!! ROSTER!")
        print(settings.settings['roster-data']['source'])
        sys.exit(-1)
    try:
        data = json.load(open(synthetic_data[url]))
    except FileNotFoundError as e:
        print(e)
        raise aiohttp.web.HTTPInternalServerError(
            text="Server configuration error. "
            "No course roster file for your account. "
            "Please ask the sysadmin to make one. "
            "(And yes, they'll want to know about this issue;"
            "you won't be bugging them)")
    return data
예제 #44
0
 def test_normal_reserved_name(self, value, test_platform, expected):
     filename = sanitize_filename(value, platform=test_platform)
     assert filename == expected
     assert is_valid_filename(filename, platform=test_platform)
예제 #45
0
    parser.add_argument("--end_page",  default=2, type=int,
                        help="номер страницы, ДО которой закончить скачивание")
    parser.add_argument("--skip_imgs",  action='store_true',
                        help="не скачивать картинки")
    parser.add_argument("--skip_txt",  action='store_true',
                        help="не скачивать книги")
    parser.add_argument("--dest_folder", default=os.path.abspath(os.curdir),
                        help="путь к каталогу с результатами парсинга: картинкам, книгами, JSON")
    parser.add_argument("--json_path", default='books.json',
                        help="указать свой путь к *.json файлу с результатами")
    args = parser.parse_args()

    img_folder = os.path.join(args.dest_folder, img_subfolder)
    txt_folder = os.path.join(args.dest_folder, txt_subfolder)
    if args.json_path:
        json_file = sanitize_filename(args.json_path)

    pathlib.Path(txt_folder).mkdir(parents=True, exist_ok=True)
    pathlib.Path(img_folder).mkdir(parents=True, exist_ok=True)

    books = []
    book_urls = get_book_url_from_pages(base_url, category, args.start_page, args.end_page)
    logger.info(f'Подготовлено {len(book_urls)} ссылок')

    for page_url in book_urls:
        local_timeout = timeout
        while local_timeout < 100:
            try:
                page = get_page(page_url)
                if not page:
                    logger.info(f'{page_url} страницы с книгой нет на сайте')
예제 #46
0
 def test_normal_str(self, platform, value, replace_text, expected):
     sanitized_name = sanitize_filename(value, platform=platform, replacement_text=replace_text)
     assert sanitized_name == expected
     assert isinstance(sanitized_name, six.text_type)
     validate_filename(sanitized_name, platform=platform)
     assert is_valid_filename(sanitized_name, platform=platform)