Esempio n. 1
0
def clustered_encode():
    global terminate
    sh = shell()
    # Here we do the clustering magic

    # This is here rather than at the top as is usual so that flac2all can work
    # even if ZeroMQ is not installed. Perhaps in future zmq will become a hard
    # dependency, and then we can move it.
    import zmq

    zcontext = zmq.Context()

    # Task socket (to send tasks out)
    tsock = zcontext.socket(zmq.PUSH)
    tsock.bind("tcp://*:2019")

    # recieve socket (gets results from workers)
    rsock = zcontext.socket(zmq.PULL)
    rsock.bind("tcp://*:2020")

    # connect loopback to recieve socket
    csock = zcontext.socket(zmq.PUSH)
    csock.connect("tcp://localhost:2020")

    # Gathering file data
    files = sh.getfiles(opts['dirpath'])
    inlist = []

    for infile in files:
        for mode in opts['mode'].split(','):
            if mode.startswith("_"):
                # This should never happen unless some smart-alec tries to call
                # a private mode publicly, but just in case
                continue
            if not infile.endswith(".flac"):
                if opts['copy'] is True:
                    opts.update({"copymode": mode})
                    line = [infile, "_copy", opts]
                    inlist.append(line)
            else:
                line = [infile, mode, opts]
                inlist.append(line)

    incount = len(inlist)
    log.info("We have %d tasks" % incount)
    start_time = time.time()
    workers = {}
    log.info("Waiting for at least one worker to join")
    results = []
    while True:
        log.active_workers(len(workers))
        log.tasks(
            incount,
            len([x for x in results if int(x[4]) == 0]),
            len([x for x in results if int(x[4]) != 0]),
        )
        # If the last seen time is more than 4 minutes, we assume worker
        # is no longer available, and clear it out
        for key in dict(workers):
            if ((time.time() - workers[key]) > 240):
                del(workers[key])
                log.warn("Worker %s not responding, clearing from list (%d remaining)" % (key, len(workers)))
                if len(workers) == 0:
                    if inlist == []:
                        # We have no more to process, just exit
                        terminate = True
                    else:
                        log.crit("No more workers. Need at least one worker to join")

        if terminate is True:
            # If we want to terminate, clear the entire inlist
            # This will clean up the same as when we end normally
            inlist = []

        try:
            line = rsock.recv_json(flags=zmq.NOBLOCK)
        except zmq.error.Again as e:
            # "Resource temporarily unavailable" varies by OS
            #   errno 11 on Linux
            #   errno 35 on FreeBSD
            # We expect this if no data, so we sit in a loop and wait
            if ((e.errno == 11) or (e.errno == 35)):
                if terminate is True:
                    log.warn("Terminated")
                    break  # We exit the loop, the zmq bits are cleaned up post loop
                time.sleep(0.01)  # wait a little bit and try again
                # Because we wait for very short here, we increase the update count
                # to prevent refreshing too fast
                log.updatecount = 200
                continue
            else:
                # Now we return the refresh count to normal
                log.updatecount = 20
                log.crit("Error #: %d" % e.errno)
                raise(e)  # re-raise other errnos

        if line[0].startswith('ONLINE'):
            worker_id = line[0].split('~')[-1]
            # A worker has joined. Add the ID and timestamp of last seen
            workers.update({worker_id: time.time()})
            log.ok("Got worker %s ( %d workers)" % (worker_id, len(workers)))
        elif line[0].startswith('EOLACK'):
            worker_id = line[0].split('~')[-1]
            if worker_id in workers:
                del(workers[worker_id])
            log.warn("Worker %s terminated (%d running)" % (worker_id, len(workers)))
            if len(workers) == 0:
                break
        elif line[0].startswith('OFFLINE'):
            worker_id = line[0].split('~')[-1]
            if worker_id in workers:
                del(workers[worker_id])

            log.crit("Worker %s gone OFF LINE (%d running)" % (worker_id, len(workers)))
            if len(workers) == 0:
                break

        elif line[0].startswith("READY"):
            # A worker is ready for a new task

            worker_id = line[0].split('~')[-1]
            # First we update the "last seen" value in worker list
            if worker_id in workers:
                workers[worker_id] = time.time()
            else:
                # If we get a ready request from a worker on in our list, we add it
                # to the list and assign it a job anyway. We assume we can trust the workers
                log.warn("Got ready signal from unknown worker. Adding to worker list")
                workers.update({worker_id: time.time()})
                log.ok("Added unknown worker %s ( %d workers)" % (worker_id, len(workers)))

            # And now we push a new task to worker
            if len(inlist) == 0:
                # We have reached the end. Send EOL
                tsock.send_json(["EOL", None, None])
                continue
            else:
                # Pop a job off the list and send to worker as task
                tsock.send_json(inlist.pop())
        elif line[0].startswith("NACK"):
            worker_id = line[0].split('~')[-1]
            log.warn("Task '%s' refused by worker %s, rescheduling" % (line[2], worker_id))
            # For whatever reason the worker is refusing the task, so
            # put it back onto the inlist for another worker to do
            inlist.append(line[1:])
        elif len(line) == 6:
            name = line[0].split('/')[-1]
            name = name.replace(".flac", "")
            if len(name) > 55:
                name = name[:55] + "..."
            line = [str(x).strip() for x in line]
            if "ERROR" in line[3]:
                log.crit("n:%-60s\tt:%-10s\ts:%-10s" % (name.encode("utf-8", "backslashreplace").decode(), line[2], line[3]))
            else:
                log.status("n:%-60s\tt:%-10s\ts:%-10s" % (name.encode("utf-8", "backslashreplace").decode(), line[2], line[3]))
            results.append(line)
        else:
            log.crit("UNKNOWN RESULT!")
            log.crit(results)

    end_time = time.time()
    rsock.close()
    csock.close()
    rsock.close()

    # Now, we confirm that the number of files sent equals the number processed
    log.info("input: %d, output: %d" % (incount, len(results)))
    if (incount != len(results)):
        log.crit("Error. Not all tasks were completed.")
        sys.exit(1)
    # log.print(list(set([x[0] for x in inlist]) - set([x[0] for x in results])))
    results = generate_summary(start_time, end_time, incount, results)
    write_logfile(opts['outdir'], results)
Esempio n. 2
0
def main():
    global log
    options, args = build_parser()

    # update the opts dictionary with new values
    opts.update(eval(options.__str__()))

    # convert the formats in the args to valid formats for lame and oggenc
    opts['oggencopts'] = ' --' + ' --'.join(opts['oggencopts'].split(':'))
    opts['opusencopts'] = ' --' + ' --'.join(opts['opusencopts'].split(':'))

    # lame is not consistent, sometimes using long opts,sometimes not
    # so we need to specify on command line with dashes whether it is a long op or
    # short
    opts['lameopts'] = ' -' + ' -'.join(opts['lameopts'].split(':'))

    # ffmpeg uses colons as delimiters, just like flac2all (of course), so we had to
    # switch to commas for this one
    opts['ffmpegopts'] = opts['ffmpegopts'].split(',')
    opts['ffmpegopts'] = list(flatten([x.split(' ') for x in opts['ffmpegopts']]))

    try:
        opts['mode'] = args[0]

    except(IndexError):  # if no arguments specified
        log.print("No mode specified! Run with '-h' for help")
        sys.exit(1)  # quit the program with non-zero status

    try:
        opts['dirpath'] = os.path.abspath(args[1])

    except(IndexError):
        log.print("No directory specified! Run with '-h' for help")
        sys.exit(2)  # quit the program with non-zero status

    # end command line checking

    # Commence main logic
    if options.curses is True:
        log = cconsole()  # switch to cconsole, if specified as option

    if not os.path.exists(opts['outdir']):
        log.info("Creating output directory")
        os.mkdir(opts['outdir'])

    # Check if we have the special mode "all", which really brings flac2all into
    # perspective. We convert to every single format supported. This is mainly added for
    # testing reasons.
    if opts['mode'] == "all":
        opts['mode'] = ','.join([x[0] for x in modetable if not x[0].startswith("_")])

    # In this version, we can convert multiple format at once, so for e.g.
    # mode = mp3,vorbis will create both in parallel
    for mode in opts['mode'].split(','):
        if mode != "":
            # When copying, we don't want a _copy dir, but one representing
            # the mode being copied to, so we check and update mode here
            if "copymode" in opts:
                mode = opts['copymode']
            try:
                os.mkdir(os.path.join(opts['outdir'], mode))
            except OSError as e:
                if e.errno == 17:
                    log.info("Folder %s already exists, reusing..." % mode)
                elif e.errno == 2:
                    log.info("Parent path %s does not exist! quitting..." % (
                        opts['outdir']
                    ))
                else:
                    # everything else, raise error
                    raise e

    # Magic goes here :)
    if opts['master_enable']:
        clustered_encode()
    else:
        threaded_encode()

    if options.curses is True:
        log.__del__()  # If we are using the curses interface, clean up properly at the end.
Esempio n. 3
0
def main():
    sh = shell()

    # process Queue,the queue that will hold all the flac files we want to convert.
    #  format: [ $infile, $target_format ]
    pQ = mp.Queue()

    # copy Queue (for copying non flac files if requested)
    #  format: [ $infile, $outfile ]
    cQ = mp.Queue()

    # logging Queue, the encoders log progress to this
    # format: [
    #   $infile,
    #   $outfile,
    #   $format,
    #   $error_status,
    #   $return_code,
    #   $execution_time
    #   ]
    lQ = mp.Queue()

    # I've decided that the encoder options should just be long options.
    # quite frankly, we are running out of letters that make sense.
    # plus it makes a distinction between encoder opts, and program opts
    # (which will continue to use single letters)
    parser = OptionParser(usage=prog_usage())
    parser.add_option("-c",
                      "--copy",
                      action="store_true",
                      dest="copy",
                      default=False,
                      help="Copy non flac files across (default=False)")

    parser.add_option("",
                      "--opus-options",
                      dest="opusencopts",
                      default="",
                      help="Colon delimited options to pass to opusenc.\
        Any oggenc long option (one with two '--' in front) can be specified\
        in the above format.")

    parser.add_option("",
                      "--vorbis-options",
                      dest="oggencopts",
                      default="quality=2",
                      help="Colon delimited options to pass to oggenc,for\
        example: 'quality=5:resample 32000:downmix:bitrate_average=96'.\
        Any oggenc long option (one with two '--' in front) can be specified\
        in the above format.")

    parser.add_option("",
                      "--lame-options",
                      dest="lameopts",
                      default="-preset standard:q 0",
                      help="Options to pass to lame,\
for example:           '-preset extreme:q 0:h:-abr'. Any lame\
option can be specified here, if you want a short option (e.g. -h),\
then just do 'h'. If you want a long option (e.g. '--abr'), then you need\
a dash: '-abr'")

    parser.add_option("",
                      "--aacplus-options",
                      dest="neroaacplusopts",
                      default="q 0.3",
                      help="Nero AACplus options, valid options is one of:\
    Quality (q $float), bitrate (br $int), or streaming bitrate (cbr $int) ")

    parser.add_option("-o",
                      "--outdir",
                      dest="outdir",
                      metavar="DIR",
                      help="Set custom output directory (default='./')",
                      default="./")

    parser.add_option(
        "-f",
        "--force",
        dest="overwrite",
        action="store_true",
        help="Force overwrite of existing files (by default we skip)",
        default=False)

    parser.add_option(
        "-t",
        "--threads",
        dest="threads",
        default=mp.cpu_count(),
        help="How many threads to run in parallel (default: autodetect\
    [found %d cpu(s)] )" % mp.cpu_count())

    parser.add_option("-n",
                      "--nodirs",
                      dest="nodirs",
                      action="store_true",
                      default=False,
                      help="Don't create Directories, put everything together")

    (options, args) = parser.parse_args()

    # update the opts dictionary with new values
    opts.update(eval(options.__str__()))

    # convert the formats in the args to valid formats for lame and oggenc
    opts['oggencopts'] = ' --' + ' --'.join(opts['oggencopts'].split(':'))
    opts['opusencopts'] = ' --' + ' --'.join(opts['opusencopts'].split(':'))

    # Nero codec is annoying, as it takes bitrate in actual bits/s, rather than
    # kbit/s as every other codec on earth works. So we need to parse things out
    # and convert

    enctype, rate = opts['neroaacplusopts'].split(' ')
    if enctype == "br" or enctype == "cbr":
        opts['neroaacplusopts'] = ' -%s %d' % (enctype, int(rate) * 1000)
    else:
        opts['neroaacplusopts'] = ' -%s %s' % (enctype, rate)

    # lame is not consistent, sometimes using long opts,sometimes not
    # so we need to specify on command line with dashes whether it is a long op or
    # short
    opts['lameopts'] = ' -' + ' -'.join(opts['lameopts'].split(':'))

    try:
        opts['mode'] = args[0]

    except (IndexError):  # if no arguments specified
        print "No mode specified! Run with '-h' for help"
        sys.exit(1)  # quit the program with non-zero status

    try:
        opts['dirpath'] = os.path.abspath(args[1])
        print "DEBUG: %s" % opts['dirpath']

    except (IndexError):
        print "No directory specified! Run with '-h' for help"
        sys.exit(2)  # quit the program with non-zero status

    # end command line checking

    if not os.path.exists(opts['outdir']):
        print "Creating output directory"
        os.mkdir(opts['outdir'])

    # In this version, we can convert multiple format at once, so for e.g.
    # mode = mp3,vorbis will create both in parallel
    for mode in opts['mode'].split(','):
        if mode != "":
            try:
                os.mkdir(os.path.join(opts['outdir'], mode))
            except OSError as e:
                if e.errno == 17:
                    print "Folder %s already exists, reusing..." % mode
                elif e.errno == 2:
                    print "Parent path %s does not exist! quitting..." % (
                        opts['outdir'])
                else:
                    # everything else, raise error
                    raise e

    # Magic goes here :)

    # 1. populate the queue with flac files
    files = sh.getfiles(opts['dirpath'])
    count = 0
    for infile in files:

        for mode in opts['mode'].split(','):
            if infile.endswith(".flac"):
                pQ.put([infile, opts['dirpath'], opts['outdir'], mode])
                count += 1
            else:
                if opts['copy']:
                    cQ.put([infile, opts['dirpath'], opts['outdir'], mode])

    time.sleep(1)  # Delay to resolve queue "broken pipe" errors

    print "We have %d flac files to convert" % count
    print "We have %d non-flac files to copy across" % cQ.qsize()

    # Right, how this will work here, is that we will pass the whole queue
    # to the encode threads (one per processor) and have them pop off/on as
    # necessary. Allows for far more fine grained control.

    opts['threads'] = int(opts['threads'])

    # keep flags for state (pQ,cQ)
    sflags = [0, 0]
    ap = []  # active processes
    while True:

        cc = opts['threads']

        while int(cc) > (len(ap)):
            print(">> Spawning execution process")
            proc = encode_thread(int(cc), "Thread %d" % int(cc), pQ, opts, lQ)
            proc.start()
            ap.append(proc)

        time.sleep(0.5)

        # Believe it or not, the only way way to be sure a queue is actually
        # empty is to try to get with a timeout. So we get and put back
        # and if we get a timeout error (10 secs), register it

        try:
            pQ.put(pQ.get(timeout=10))
        except mp.TimeoutError as e:
            print "Process queue finished."
            sflags[0] = 1
        except Queue.Empty as e:
            print "Process queue finished."
            sflags[0] = 1
        else:
            sflags[0] = 0

        # sflags[1] = 1
        # Commented out until we get the shell_process_thread function written
        #
        try:
            command = cQ.get(timeout=10)
            srcfile, srcroot, dest, encformat = command
            outdir = sh.generateoutdir(srcfile, os.path.join(dest, encformat),
                                       srcroot)
            copytarget(srcfile, outdir)
            print("%s => %s" % (srcfile, outdir))
        except mp.TimeoutError as e:
            sflags[1] = 1
        except Queue.Empty as e:
            sflags[1] = 1
        else:
            sflags[1] = 0

        if sflags == [1, 1]:
            print "Processing Complete!"
            break

        # Sometimes processes die (due to errors, or exit called), which
        # will slowly starve the script as they are not restarted. The below
        # filters out dead processes, allowing us to respawn as necessary
        ap = filter(lambda x: x.isAlive(), ap)

    # Now wait for all running processes to complete
    print "Waiting for all running process to complete."
    print ap

    # We don't use os.join because if a child hangs, it takes the entire program
    # with it
    st = time.time()
    while True:

        if len(filter(lambda x: x.is_alive(), ap)) == 0: break
        print "-" * 80
        for proc in filter(lambda x: x.is_alive(), ap):
            print "Process \"%s\" is still running! Waiting..." % proc.name
            print "-" * 80
        time.sleep(4)
        print ""
        if (time.time() - st) > 600:
            print "Process timeout reached, terminating stragglers and continuing\
            anyway"

            map(lambda x: x.terminate(), filter(lambda x: x.is_alive(), ap))
            break

    # Now we fetch the log results, for the summary
    print "Processing run log..."
    log = []
    while not lQ.empty():
        log.append(lQ.get(timeout=2))

    total = len(log)
    successes = len(filter(lambda x: x[4] == 0, log))
    failures = total - successes
    print "\n\n"
    print "=" * 80
    print "| Summary "
    print "-" * 80
    print """
Total files on input: %d
Total files actually processed: %d
--
Execution rate: %.2f %%


Files we managed to convert successfully: %d
Files we failed to convert due to errors: %d
--
Conversion error rate: %.2f %%
""" % (count, total, ((float(total) / count) * 100), successes, failures,
       ((failures / float(total)) * 100))

    for mode in opts['mode'].split(','):
        # 1. find all the logs corresponding to a particular mode
        x = filter(lambda x: x[2] == mode, log)
        # 2. Get the execution time for all relevant logs
        execT = map(lambda y: y[5], x)
        if len(execT) != 0:
            esum = sum(execT)
            emean = sum(execT) / len(execT)
        else:
            # Empty set, just continue
            print("For mode %s:\nNo data (no files converted)\n" % mode)
            continue

        execT.sort()
        if len(execT) % 2 != 0:
            # Odd number, so median is middle
            emedian = execT[(len(execT) - 1) / 2]
        else:
            # Even set. So median is average of two middle numbers
            num1 = execT[((len(execT) - 1) / 2) - 1]
            num2 = execT[((len(execT) - 1) / 2)]
            emedian = (sum([num1, num2]) / 2.0)

        etime = "Total execution time: "
        if esum < 600:
            etime += "%.4f seconds" % esum
        elif esum > 600 < 3600:
            etime += "%.4f minutes" % (esum / 60)
        else:
            etime += "%.4f hours" % (esum / 60 / 60)

        print """
For mode: %s
%s
Per file conversion:
\tMean execution time: %.4f seconds
\tMedian execution time: %.4f seconds
""" % (mode, etime, emean, emedian)

    errout_file = opts['outdir'] + "/conversion_results.log"
    print "Writing log file (%s)" % errout_file
    fd = open(errout_file, "w")
    fd.write(
        "infile,outfile,format,conversion_status,return_code,execution_time\n")
    for item in log:
        item = map(lambda x: str(x), item)
        line = ','.join(item)
        fd.write("%s\n" % line)
    fd.close()
    print "Done!"

    if failures != 0:
        print "We had some failures in encoding :-("
        print "Check %s file for info." % errout_file
        print "Done! Returning non-zero exit status! "
        sys.exit(-1)
    else:
        sys.exit(0)