def _save_durations_csv(input_path, durs_path, data_kind): it_multi = pd.MultiIndex.from_product([list(range(1, 134)), ['a', 'b']], names=['patient', 'trial']) df_durs = pd.DataFrame(index=it_multi, columns=['duration_s', 'sfreq']) for file in files_builder(data_kind): file_path = os.path.join(input_path, file.name) index = get_index(file_path) trial = get_trial(file_path) df_durs.loc[(index, trial), 'duration_s'] = get_duration(file_path, file.df) df_durs.loc[(index, trial), 'sfreq'] = float(get_sampling_frequency(file_path)) df_durs.to_pickle(durs_path) return df_durs
def check_results(self): if self.result in [Result.PASSED, Result.NOT_RUN] and self.scenario is None: res, msg = self.check_encoded_file() self.set_result(res, msg) else: if self.result == utils.Result.TIMEOUT: missing_eos = False try: if utils.get_duration(self.dest_file) == self.project.get_duration(): missing_eos = True except Exception as e: pass if missing_eos is True: self.set_result(utils.Result.TIMEOUT, "The rendered file add right duration, MISSING EOS?\n", "failure") else: GstValidateTest.check_results(self)
def check_results(self): if self.result is Result.PASSED and self.scenario is None: res, msg = utils.compare_rendered_with_original(self.duration, self.dest_file) self.set_result(res, msg) else: if self.result == utils.Result.TIMEOUT: missing_eos = False try: if utils.get_duration(self.dest_file) == self.duration: missing_eos = True except Exception as e: pass if missing_eos is True: self.set_result(utils.Result.TIMEOUT, "The rendered file add right duration, MISSING EOS?\n", "failure", e) else: GstValidateTest.check_results(self)
def day_conferences_fct(self, obj): day_conference_lst = [] day_conferences = DayConference.objects.filter(conference=obj) for day_conference in day_conferences: day_conference_dict = {} day_conference_dict['id'] = day_conference.id day_conference_dict['day'] = cal_utils.get_day(day_conference.day) day_conference_dict['hour_start'] = cal_utils.get_hour( day_conference.hour_start) day_conference_dict['hour_end'] = cal_utils.get_hour( day_conference.hour_end) day_conference_dict['duration'] = cal_utils.get_duration( day_conference.hour_start, day_conference.hour_end) day_conference_dict['is_full'] = day_conference.is_full day_conference_lst.append(day_conference_dict) return day_conference_lst
def check_results(self): if self.result in [Result.PASSED, Result.NOT_RUN ] and self.scenario is None: res, msg = self.check_encoded_file() self.set_result(res, msg) else: if self.result == utils.Result.TIMEOUT: missing_eos = False try: if utils.get_duration( self.dest_file) == self.project.get_duration(): missing_eos = True except Exception as e: pass if missing_eos is True: self.set_result( utils.Result.TIMEOUT, "The rendered file add right duration, MISSING EOS?\n", "failure") else: GstValidateTest.check_results(self)
def create_clips(video_path, output_folder, interval_seconds, clip_length): if not os.path.exists(video_path): raise ClipError(f'The specified video file does not exist.') if not os.path.exists(output_folder): os.mkdir(output_folder) duration = int(float(get_duration(video_path))) if interval_seconds > duration: raise ClipError(f'The interval ({interval_seconds}s) may not be longer than the video ({duration}s).') number_steps = math.trunc(duration / interval_seconds) output_clip_names = 'clips.txt' output_file_path = f'{output_folder}/{output_clip_names}' clip_file = open(output_file_path, 'w') line() print(f'Creating a {clip_length} second clip every {interval_seconds} seconds from {video_path}...') line() try: for step in range(1, number_steps): clip_name = f'clip{step}.mkv' clip_file.write(f'file \'{clip_name}\'\n') output_filename = os.path.join(output_folder, clip_name) clip_offset = step_to_movie_timestamp(step * interval_seconds) print(f'Creating clip {step} which starts at {clip_offset}...') subprocess_cut_args = [ "ffmpeg", "-loglevel", "warning", "-stats", "-y", "-ss", str(clip_offset), "-i", video_path, "-map", "0", "-t", str(clip_length), "-c:v", "libx264", "-crf", "0", "-preset", "ultrafast", "-an", "-sn", output_filename ] subprocess.run(subprocess_cut_args) finally: clip_file.close() return output_file_path
def main(corpus_dir, labels_dir, output_dir, sample_rate=16000, use_reference=False): if not os.path.exists(output_dir): os.makedirs(output_dir) datadir = os.path.join(corpus_dir, 'core') transdir = os.path.join(labels_dir, 'transcriptions') speaker_map_filename = os.path.join(corpus_dir, 'doc', 'speakers') speaker_map = read_speaker_map(speaker_map_filename) wav_base = 'FILEID sox WAVPATH -r {0} -t .wav - |'.format(sample_rate) # skip utterances of types D (articulatory), E (non-speech), and F (other) skip_tasks = ('D', 'E', 'F') for subset in speaker_map: print('Processing {0} data'.format(subset)) subset_outdir = os.path.join(output_dir, subset) if not os.path.exists(subset_outdir): os.makedirs(subset_outdir) speaker_utts = {} text, wav = [], [] utt2spk, spk2utt = [], [] utt2dur = [] for speaker in speaker_map[subset]: speaker_dir = os.path.join(datadir, speaker) flist = [f for f in os.listdir(speaker_dir) if f.endswith('.wav')] for f in flist: f = f.replace('.wav', '') if f.endswith(skip_tasks): continue # read transcription and convert to SLT/CHILD tokens fileid = '-'.join([speaker, f]) txt_f = os.path.join(transdir, fileid + '.txt') with open(txt_f, 'r') as fid: txt = fid.readline().rstrip() words = [] for w in txt.split(): w = w.upper() w = 'SLT' if 'SLT' in w else 'CHILD' words.append(w) words = ' '.join([fileid] + words) text.append(words) # prepare wav.scp wavpath = os.path.join(speaker_dir, f + '.wav') file_wav = wav_base.replace('FILEID', fileid) file_wav = file_wav.replace('WAVPATH', wavpath) wav.append(file_wav) # prepare utt2dur dur = get_duration(wavpath) utt2dur.append('{0} {1}'.format(fileid, dur)) # prepare utt2spk utt2spk.append('{0} {1}'.format(fileid, speaker)) if speaker in speaker_utts: speaker_utts[speaker].append(fileid) else: speaker_utts[speaker] = [fileid] # prepare spk2utt for speaker in speaker_utts: spk_utts = '{0} {1}'.format( speaker, ' '.join(sorted(speaker_utts[speaker]))) spk2utt.append(spk_utts) text_f = os.path.join(subset_outdir, 'text') wav_f = os.path.join(subset_outdir, 'wav.scp') utt2spk_f = os.path.join(subset_outdir, 'utt2spk') spk2utt_f = os.path.join(subset_outdir, 'spk2utt') utt2dur_f = os.path.join(subset_outdir, 'utt2dur') write_data(text, text_f) write_data(wav, wav_f) write_data(utt2spk, utt2spk_f) write_data(spk2utt, spk2utt_f) write_data(utt2dur, utt2dur_f) # validate data directory validate_cmd = './utils/validate_data_dir.sh --no-feats {0}'.format( subset_outdir) os.system(validate_cmd)
def upload(): # app.logger.warning(request) log_request_info(request) # video is a werkzeug.datastructures.FileStorage object video = request.files['video-blob'] app.logger.warning(video) # app.logger.warning(video['contents']) app.logger.warning(type(video)) # data = video.read() # app.logger.warning(data) app.logger.warning("filename: {0}".format(video.filename)) audio = request.files['audio-blob'] app.logger.warning(audio) # data = audio.read() # app.logger.warning(len(data)) try: # Get the name of the uploaded file # file = request.files['file'] video = request.files['video-blob'] audio = request.files['audio-blob'] except Exception as e: app.logger.warning("error: {0}".format(e)) raise Exception(e) # let's time it start = time.time() video_filename = '' audio_filename = '' if video: # and allowed_file(video.filename): # data = video.read() # Make the filename safe, remove unsupported chars filename = secure_filename(video.filename) + '_video' + '.webm' video_filename = filename # Move the file form the temporal folder to # the upload folder we setup #file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) video.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # Redirect the user to the uploaded_file route, which # will basicaly show on the browser the uploaded file # return redirect(url_for('uploaded_file', # filename=filename)) if audio: audio_filename = secure_filename( audio.filename) + '_audio' + '.wav' #.mp3? print("AUDIO_1!!!: {0}".format(audio_filename)) # audio_filename = new_filename(filename, "_mono") audio.save(os.path.join(app.config['UPLOAD_FOLDER'], audio_filename)) app.logger.warning("filename: {0}".format(audio_filename)) stereo_to_mono( os.path.join(app.config['UPLOAD_FOLDER'], audio_filename)) audio_filename = new_filename(audio_filename, "_mono") audio_filename = os.path.join(app.config['UPLOAD_FOLDER'], audio_filename) outfilename = '/home/ec2-user/flask_attempts/data/test.txt' stats = dict() # decode the speech in the file #ling_stats = decode_speech_driver(filename, outfilename) ling_stats = decode_speech(audio_filename) end = time.time() total_time = round(end - start) stats['time_to_analyze'] = total_time print("AUDIO_2!!!: {0}".format(audio_filename)) stats['total speech time'] = get_duration(audio_filename) # combine the different stats to display in the template stats = dict(stats.items() + ling_stats.items()) app.logger.warning('stats: {0}'.format(stats)) # render the speech as text on a different page return render_template('decoded_speech.html', stats=stats, video_filename=video_filename, audio_filename=audio_filename)
def generate_planets(theta, stars=stlr, mes_threshold=10): """ theta = (lnf0, alpha, beta, fB, gamma) """ lnf0, alpha, beta, fB, gamma = theta planets = pd.DataFrame({'kepid':[], 'koi_prad':[], 'koi_period':[], 'koi_prad_true':[], 'koi_max_mult_ev':[]}) n_skipped = 0 for _, star in stars.iterrows(): if np.isnan(star.radius) or np.isnan(star.mass): n_skipped += 1 continue n_planets = poisson(np.exp(lnf0)).rvs() if n_planets == 0: continue try: star2, flux_ratio = get_companion(theta, star) except ValueError: n_skipped += 1 continue #logging.warning('Skipping {}; cannot simulate binary.'.format(star.kepid)) for i in range(n_planets): # First, figure out true & observed properties of planet radius, period = draw_planet(theta) observed_radius, host_star = diluted_radius(radius, star, star2, flux_ratio) logging.debug('True: {:.2f}, Observed: {:.2f} ({})'.format(radius, observed_radius, flux_ratio)) # Then, is it detected? # First, geometric: aor = get_a(period, host_star.mass) if np.isnan(aor): raise RuntimeError('aor is nan: P={} M={}'.format(period, host_star.mass)) #print(host_star.mass, aor) transit_prob = get_pgeom(aor / host_star.radius, 0.) # no ecc. if np.random.random() > transit_prob: continue # Then depth and MES: depth = get_delta(observed_radius * R_EARTH / star.radius) tau = get_duration(period, aor, 0.) * 24 # no ecc. try: mes = get_mes(star, period, depth, tau) except ValueError: n_skipped += 1 #raise RuntimeError('MES is nan! {}, {}, {}'.format(depth, tau)) if mes < mes_threshold: continue # Add planet to catalog planets = planets.append({'kepid':star.kepid, 'koi_prad':observed_radius, 'koi_period':period, 'koi_prad_true':radius, 'koi_max_mult_ev':mes}, ignore_index=True) print('{} planets generated ({} of {} stars skipped.)'.format(len(planets), n_skipped, len(stars))) return planets
def get_kodidb_setdata(metadatautils, set_id): '''get moviesetdetails from Kodi DB''' details = {} movieset = metadatautils.kodidb.movieset(set_id, FIELDS_MOVIES) count = 0 runtime = 0 unwatchedcount = 0 watchedcount = 0 runtime = 0 writer = [] director = [] genre = [] countries = [] studio = [] years = [] plot = "" title_list = "" total_movies = len(movieset['movies']) title_header = "[B]%s %s[/B][CR]" % (total_movies, xbmc.getLocalizedString(20342)) all_fanarts = [] details["art"] = movieset["art"] movieset_movies = sorted(movieset['movies'], key=itemgetter("year")) for count, item in enumerate(movieset_movies): if item["playcount"] == 0: unwatchedcount += 1 else: watchedcount += 1 # generic labels for label in ["label", "plot", "year", "rating"]: details['%s.%s' % (count, label)] = item[label] details["%s.DBID" % count] = item["movieid"] details["%s.duration" % count] = item['runtime'] / 60 # art labels art = item['art'] for label in ["poster", "fanart", "landscape", "clearlogo", "clearart", "banner", "discart"]: if art.get(label): details['%s.art.%s' % (count, label)] = get_clean_image(art[label]) if not movieset["art"].get(label): movieset["art"][label] = get_clean_image(art[label]) all_fanarts.append(get_clean_image(art.get("fanart"))) # streamdetails if item.get('streamdetails', ''): streamdetails = item["streamdetails"] audiostreams = streamdetails.get('audio', []) videostreams = streamdetails.get('video', []) subtitles = streamdetails.get('subtitle', []) if len(videostreams) > 0: stream = videostreams[0] height = stream.get("height", "") width = stream.get("width", "") if height and width: resolution = "" if width <= 720 and height <= 480: resolution = "480" elif width <= 768 and height <= 576: resolution = "576" elif width <= 960 and height <= 544: resolution = "540" elif width <= 1280 and height <= 720: resolution = "720" elif width <= 1920 and height <= 1080: resolution = "1080" elif width * height >= 6000000: resolution = "4K" details["%s.resolution" % count] = resolution details["%s.Codec" % count] = stream.get("codec", "") if stream.get("aspect", ""): details["%s.aspectratio" % count] = round(stream["aspect"], 2) if len(audiostreams) > 0: # grab details of first audio stream stream = audiostreams[0] details["%s.audiocodec" % count] = stream.get('codec', '') details["%s.audiochannels" % count] = stream.get('channels', '') details["%s.audiolanguage" % count] = stream.get('language', '') if len(subtitles) > 0: # grab details of first subtitle details["%s.SubTitle" % count] = subtitles[0].get('language', '') title_list += "%s (%s)[CR]" % (item['label'], item['year']) if item['plotoutline']: plot += "[B]%s (%s)[/B][CR]%s[CR][CR]" % (item['label'], item['year'], item['plotoutline']) else: plot += "[B]%s (%s)[/B][CR]%s[CR][CR]" % (item['label'], item['year'], item['plot']) runtime += item['runtime'] if item.get("writer"): writer += [w for w in item["writer"] if w and w not in writer] if item.get("director"): director += [d for d in item["director"] if d and d not in director] if item.get("genre"): genre += [g for g in item["genre"] if g and g not in genre] if item.get("country"): countries += [c for c in item["country"] if c and c not in countries] if item.get("studio"): studio += [s for s in item["studio"] if s and s not in studio] years.append(str(item['year'])) details["plots"] = plot if total_movies > 1: details["extendedplots"] = title_header + title_list + "[CR]" + plot else: details["extendedplots"] = plot details["titles"] = title_list details["runtime"] = runtime / 60 details.update(get_duration(runtime / 60)) details["writer"] = writer details["director"] = director details["genre"] = genre details["studio"] = studio details["years"] = years if len(years) > 1: details["year"] = "%s - %s" % (years[0], years[-1]) else: details["year"] = years[0] if years else "" details["country"] = countries details["watchedcount"] = str(watchedcount) details["unwatchedcount"] = str(unwatchedcount) details.update(metadatautils.studiologos.get_studio_logo(studio, metadatautils.studiologos_path)) details["count"] = total_movies details["art"]["fanarts"] = all_fanarts return details
from utils import get_duration ends = ( '2018-11-20 00:00:01', '2018-11-20 00:01:00', '2018-11-20 00:02:02', '2018-11-20 03:00:04', '2018-11-20 03:04:05', '2018-11-21 00:00:00', '2018-11-22 16:00:00', '2018-11-23 00:56:00', '2018-11-27 00:14:45', '2018-12-30 12:34:56', ) for end in ends: print(get_duration(start='2018-11-20 00:00:00', end=end))
def main(corpus_dir, labels_dir, output_dir, sample_rate=16000, use_reference=False): if not os.path.exists(output_dir): os.makedirs(output_dir) datadir = os.path.join(corpus_dir, 'core') speaker_map_filename = os.path.join(corpus_dir, 'doc', 'speakers') speaker_map = read_speaker_map(speaker_map_filename) wav_base = 'FILEID sox WAVPATH -r {0} -t .wav - |'.format(sample_rate) if use_reference: ref_dir = os.path.join(labels_dir, 'reference_labels', 'speaker_labels', 'lab') reference_list = [f.replace('.lab', '') for f in os.listdir(ref_dir)] speaker_utts = {} text, wav = [], [] utt2spk, spk2utt = [], [] utt2dur = [] for subset in speaker_map: print('Processing {0} data'.format(subset)) for speaker in speaker_map[subset]: speaker_dir = os.path.join(datadir, speaker) flist = [f for f in os.listdir(speaker_dir) if f.endswith('.wav')] for f in flist: f = f.replace('.wav', '') fileid = '-'.join([speaker, f]) if use_reference: if fileid not in reference_list: continue # use prompt for text, although it will be ignored for decoding txt_f = os.path.join(speaker_dir, f + '.txt') with open(txt_f, 'r') as fid: txt = fid.readline().rstrip() words = [] for w in txt.split(): w = w.upper() words.append(w) words = ' '.join([fileid] + words) text.append(words) # prepare wav.scp wavpath = os.path.join(speaker_dir, f + '.wav') file_wav = wav_base.replace('FILEID', fileid) file_wav = file_wav.replace('WAVPATH', wavpath) wav.append(file_wav) # prepare utt2dur dur = get_duration(wavpath) utt2dur.append('{0} {1}'.format(fileid, dur)) # prepare utt2spk utt2spk.append('{0} {1}'.format(fileid, speaker)) if speaker in speaker_utts: speaker_utts[speaker].append(fileid) else: speaker_utts[speaker] = [fileid] # prepare spk2utt for speaker in speaker_utts: spk_utts = '{0} {1}'.format(speaker, ' '.join(sorted(speaker_utts[speaker]))) spk2utt.append(spk_utts) text_f = os.path.join(output_dir, 'text') wav_f = os.path.join(output_dir, 'wav.scp') utt2spk_f = os.path.join(output_dir, 'utt2spk') spk2utt_f = os.path.join(output_dir, 'spk2utt') utt2dur_f = os.path.join(output_dir, 'utt2dur') write_data(text, text_f) write_data(wav, wav_f) write_data(utt2spk, utt2spk_f) write_data(spk2utt, spk2utt_f) write_data(utt2dur, utt2dur_f) # validate data directory validate_cmd = './utils/validate_data_dir.sh --no-feats {0}'.format( output_dir) os.system(validate_cmd)
def process(self): # fill na for column in self.nanum_columns: print("Fill NA {}".format(column)) self.df_all[column].fillna(-1, inplace=True) for column in self.nastr_columns: print("Fill NA {}".format(column)) self.df_all[column].fillna("", inplace=True) # new features self.df_all["dstipscope_dominate"] = self.df_all.apply( lambda row: utils.get_ip_scope(row["dstipcategory_dominate"]), axis=1) self.df_all["srcipscope_dominate"] = self.df_all.apply( lambda row: utils.get_ip_scope(row["srcipcategory_dominate"]), axis=1) # ip zone features self.df_all["ip_zone_1"] = self.df_all.apply( lambda row: utils.get_ip_zone(row["ip"], 1), axis=1) self.df_all["ip_zone_2"] = self.df_all.apply( lambda row: utils.get_ip_zone(row["ip"], 2), axis=1) self.df_all["ip_zone_3"] = self.df_all.apply( lambda row: utils.get_ip_zone(row["ip"], 3), axis=1) self.df_all["ip_zone_4"] = self.df_all.apply( lambda row: utils.get_ip_zone(row["ip"], 4), axis=1) # concatenation features self.df_all["ip_zone_12"] = self.df_all.apply( lambda row: utils.concatenate_values( [row["ip_zone_1"], row["ip_zone_2"]]), axis=1) self.df_all["ip_zone_123"] = self.df_all.apply( lambda row: utils.concatenate_values( [row["ip_zone_1"], row["ip_zone_2"], row["ip_zone_3"]]), axis=1) self.df_all["ip_zone_34"] = self.df_all.apply( lambda row: utils.concatenate_values( [row["ip_zone_3"], row["ip_zone_4"]]), axis=1) self.df_all["ip_zone_234"] = self.df_all.apply( lambda row: utils.concatenate_values( [row["ip_zone_2"], row["ip_zone_3"], row["ip_zone_4"]]), axis=1) self.le_columns.append("ip_zone_12") self.le_columns.append("ip_zone_123") self.le_columns.append("ip_zone_34") self.le_columns.append("ip_zone_234") feature_pairs = [("categoryname", "ipcategory_scope"), \ ("categoryname", "overallseverity"), \ ("srcipscope_dominate", "dstipscope_dominate")] for item in feature_pairs: f1 = item[0] f2 = item[1] fn = f1 + "_" + f2 self.df_all[fn] = self.df_all.apply( lambda row: utils.concatenate_values([row[f1], row[f2]]), axis=1) self.le_columns.append(fn) # timestamp_dist in hour and minute self.df_all["timestamp_hour"] = self.df_all.apply( lambda row: utils.get_duration(row["timestamp_dist"]), axis=1) # ending time features self.df_all["end_hour"] = self.df_all.apply( lambda row: utils.get_end_time(row["start_hour"], row[ "start_minute"], row["start_second"], row["timestamp_dist"], "hour"), axis=1) self.df_all["end_minute"] = self.df_all.apply( lambda row: utils.get_end_time(row["start_hour"], row[ "start_minute"], row["start_second"], row["timestamp_dist"], "minute"), axis=1) self.df_all["end_second"] = self.df_all.apply( lambda row: utils.get_end_time(row["start_hour"], row[ "start_minute"], row["start_second"], row["timestamp_dist"], "second"), axis=1) # sum score features self.df_all["sum_score"] = self.df_all.apply( lambda row: utils.get_sum([ row["{}score".format(score)] for score in ["untrust", "flow", "trust", "enforcement"] ]), axis=1) self.df_all["sum_n"] = self.df_all.apply(lambda row: utils.get_sum( [row["n{}".format(i)] for i in range(1, 11)]), axis=1) self.df_all["sum_p5"] = self.df_all.apply(lambda row: utils.get_sum( [row["p5{}".format(p5)] for p5 in ["m", "w", "d"]]), axis=1) self.df_all["sum_p8"] = self.df_all.apply(lambda row: utils.get_sum( [row["p8{}".format(p8)] for p8 in ["m", "w", "d"]]), axis=1) #self.df_all["sum_p58"] = self.df_all.apply(lambda row: utils.get_sum([row["sum_p5"], row["sum_p8"]]), axis = 1) # get ratio features # self.df_all["thrcnt_month_week"] = self.df_all.apply(lambda row: utils.get_ratio(row["thrcnt_month"], row["thrcnt_week"]), axis = 1) self.df_all["thrcnt_month_day"] = self.df_all.apply( lambda row: utils.get_ratio(row["thrcnt_month"], row["thrcnt_day"] ), axis=1) self.df_all["thrcnt_week_day"] = self.df_all.apply( lambda row: utils.get_ratio(row["thrcnt_week"], row["thrcnt_day"]), axis=1) # encode features with label encoder label_encoder = LabelEncoder() for column in self.le_columns: print("Label encoding {}".format(column)) label_encoder.fit(self.df_all[column]) self.df_all[column] = label_encoder.transform(self.df_all[column]) # encode features with one-hot encoder for column in self.oe_columns: print("One-hot encoding {}".format(column)) pd_encoded = pd.get_dummies(self.df_all[column]) pd_encoded.columns = [ "{}_{}".format(column, "_".join(str(col).lower().split())) for col in pd_encoded.columns ] self.df_all.drop(column, axis=1, inplace=True) self.df_all = pd.concat([self.df_all, pd_encoded], axis=1)