def computeDirDiff(role, s: PickleSocket): print('Evaluating situation...') list_dir = [x for x in os.listdir() if isfile(x)] if role == 'c': total = len(list_dir) s.sendObj(total) with Jdt(total, 'Compare Hash') as jdt: for filename in list_dir: jdt.acc() s.sendObj(filename) s.sendObj(hashFile(filename)) return s.recvObj() elif role == 's': already_good = [] total = s.recvObj() with Jdt(total, 'Compare Hash') as jdt: for _ in range(total): jdt.acc() filename = s.recvObj() my_hash = None if isfile(filename): my_hash = hashFile(filename) their_hash = s.recvObj() if my_hash is not None and their_hash == my_hash: already_good.append(filename) result = [x for x in list_dir if x not in already_good] s.sendObj(result) return result
def main(): with open('../data2/all_id.pickle', 'rb') as f: all_id = pickle.load(f) jdt = Jdt(len(all_id), UPP=128) with open('../data2/calendar_estimate.csv', 'w', newline='') as f: c = csv.writer(f) c.writerow([ 'id', 'probability_closed', *[f'probability_unavailable_{x}' for x in X1], *[f'occupancy_rate_{x}' for x in X1], ]) [(oneListing(x, c), jdt.acc()) for x in all_id] jdt.complete() print('ok')
def main(): with open('../raw/listings_details.csv', 'r', encoding='utf-8') as fin: cin = csv.reader(fin) header_in = next(cin) header_in_dict = {x: header_in.index(x) for x in all_text} header_in_dict['id'] = header_in.index('id') def lookup(row, x): return row[header_in_dict[x]] with open('./text.csv', 'w+', newline='', encoding='utf-8') as fout: cout = csv.writer(fout) header_out = [ 'id', *all_text, *[wc(x) for x in need_word_count], 'name_is_all_cap', 'name_avg_word_len', 'doc_id', ] cout.writerow(header_out) with Jdt(50000, UPP=128) as jdt: for row in cin: jdt.acc() handle(row, cout, lookup)
def main(): with open('../data2/all_id.pickle', 'rb') as f: all_id = pickle.load(f) with Jdt(len(all_id), UPP=128) as jdt: with open('../data2/large_chunks.csv', 'w+', newline='') as f: c = csv.writer(f) c.writerow([ 'id', *[f'chunk_of_unavai_{x}' for x in range(4)], ]) for x in all_id: jdt.acc() c.writerow([x, *largeChunks(x)])
def main(): all_imgs = [x.split('_')[0] for x in listAll(IMGS)] print(len(all_imgs), 'images on disk') all_docs = listAll(DOCS) print(len(all_docs), 'docs viewed') wows = [] saves = [] with Jdt(len(all_docs), 'filtering', UPP=4) as j: for doc_id in all_docs: j.acc() doc: Doc = loadDoc(doc_id) if doc.response == RES_SAVE: saves.append(doc) elif doc.response == RES_WOW: wows.append(doc) lacks = set([x.id for x in saves]) - set(all_imgs) if lacks: print('Document response is SAVE but image not on disk:') [print(x) for x in lacks] print('Download now?') if input('y/n ').lower() == 'y': for id in lacks: doc = [x for x in saves if x.id == id][0] print('Getting', doc.id, '...') imgs = [getImage(x) for x in doc.img_urls] saveImg(doc, imgs) print('Complete.') p = input('Input path to download to: ') fns = [(osp.join(p, doc.id) + '.' + doc.img_type, doc) for doc in wows] missing_fns = [x for x in fns if not osp.exists(x[0])] print(f'There are {len(fns)} wow ones. {len(missing_fns)} are not on disk.') with Jdt(len(missing_fns)) as j: for fn, doc in missing_fns: j.acc() img = getImage(doc.img_urls[0]) with open(fn, 'wb+') as f: f.write(img) input('Ok. Press enter to quit...')
def main(): with open('../data2/pilot_features.csv', 'r', encoding='utf-8') as f: c = csv.reader(f) head = next(c) with open('../data2/cleaned.csv', 'w+', encoding='utf-8', newline='') as outF: out = csv.writer(outF) amen_col = head.index('amenities') rate_col = head.index('host_response_rate') veri_col = head.index('host_verifications') pric_col = head.index('weekly_price') - 1 fill_0 = [ head.index(x) for x in ['security_deposit', 'cleaning_fee'] ] fill_1 = [ head.index(x) for x in [ 'host_listings_count', 'host_total_listings_count', 'bedrooms', 'beds' ] ] outHead = head[:] outHead[veri_col] = 'num_of_host_verifications' outHead[pric_col + 1] = 'weekly_discount' outHead[pric_col + 2] = 'monthly_discount' out.writerow(outHead[:amen_col] + ['amen_' + x for x in ALL_AMEN] + outHead[amen_col + 1:]) with Jdt(50000, 'cleaning', UPP=256) as jdt: for row in c: jdt.acc() try: row[rate_col] = rate(row[rate_col]) row[rate_col + 1] = rate(row[rate_col + 1]) row[veri_col] = verificaitons(row[veri_col]) row[pric_col:pric_col + 3] = prices(*row[pric_col:pric_col + 3]) for col in fill_0: if not row[col]: row[col] = '0' for col in fill_1: if not row[col]: row[col] = '1' row = row[:amen_col] + amen( row[amen_col]) + row[amen_col + 1:] out.writerow(row) except: with open('error.log', 'a') as f: print(row[0], file=f) import traceback traceback.print_exc()
def overallLoss(x, which_end, msg=''): jdt = Jdt(len_list_dir, msg=msg, UPP=128) acc = 0 for filename in list_dir: jdt.acc() acc += lossFile(filename.split('.')[0], x, which_end) jdt.complete() with open('m2_x1x2_result.csv', 'a') as f: print(x, which_end, acc / len_list_dir, sep=',', file=f) return acc
def main(): print('ls...') overall = loadOverall() baseline = score(overall) tagInfos = [] la = listAll(TAGS) with Jdt(len(la), 'Evaluating...', UPP=8) as j: for i in la: j.acc() ti = load(i) total = 0 for r in ALL_RESPONSES: total += ti.n_responses.get(r, 0) if ti.type == 'artist': if total < ARTIST_TOTAL_THRESHOLD: continue else: if total < 10: continue try: s = score(ti.n_responses) - baseline except ZeroDivisionError: s = 0 if abs(s) > 0.2: tagInfos.append((ti, s)) print('Sorting...') tagInfos.sort(key=lambda x: x[1]) print() FILENAME = 'tags_summary.csv' print(f'writing {FILENAME}...') header = ['Score', 'Type', 'Display', 'Name'] with open(FILENAME, 'w', encoding='utf-8', newline='') as f: c = csv.writer(f) c.writerow(header) for ti, s in tagInfos: c.writerow([ format(s, '+.1f'), str(ti.type), ti.display, ti.name, ]) with open(FILENAME, 'r', encoding='utf-8') as f: c = csv.reader(f) table = [*c] printTable(table)
def main(): res = get(HOST + PATH) os.chdir(DEST) dir_name = datetime.today().strftime('%Y-%m-%d') os.mkdir(dir_name) os.chdir(dir_name) parts = res.text.split(LEFT)[1:] with Jdt(len(parts), 'Downloading') as j: for part in parts: fullname = part.split(RIGHT, 1)[0] _, filename = path.split(fullname) if filename == '..': j.acc() continue # print(filename) res = get(HOST + fullname) with open(filename, 'wb') as f: f.write(res.content) j.acc() input('Done! Enter...')
def getListOfFrames(folder, duration_of_interest, region_of_interest): os.chdir(folder) frames = [] jdt = Jdt(len(duration_of_interest), 'reading', UPP=8) for filename in duration_of_interest: jdt.acc() try: image = Image.open(filename) except: print('Warning: Failed to open file', filename) continue frames.append(np.array(image.crop(region_of_interest))) jdt.complete() os.chdir('..') return frames
def main(): with open('../raw/listings.csv', 'r', encoding='utf-8') as listingF: listingC = csv.reader(listingF) next(listingC) with open('../data2/calendar_estimate.csv', 'r') as estimateF: estimateC = csv.reader(estimateF) next(estimateC) with open('../data2/review_rate_regression.csv', 'w+', newline='') as outF: out = csv.writer(outF) out.writerow([ 'id', 'is_review_recent_z_score', 'reviews_per_month', *[f'y_{x}' for x in X1], 'length_of_stay', ]) scrape_day = datetime.datetime.strptime('2/12/2020', '%m/%d/%Y') with Jdt(len(getAllId()), UPP = 16) as j: for id2, _, *estimate_line in estimateC: j.acc() while True: line = next(listingC) id = line[0] if id == id2: break else: raise Exception('error 328') min_nights, _, last_review, rpm = line[10:14] occupancy = estimate_line[len(X1):] length_of_stay = max(int(min_nights), AVG_LENGTH) y = [float(x) / length_of_stay for x in occupancy] if last_review == '': z_score = 999 else: date = datetime.datetime.strptime(last_review, '%Y-%m-%d') blank = (scrape_day - date).days if blank < 3: z_score = 0 else: z_score = blank / ((365.25 / 12) / float(rpm)) out.writerow([id, z_score, rpm, *y, length_of_stay]) print('ok')
def main(): folder = input('Drag folder/photo here and press Enter: ') folder = folder.replace('\\', '').rstrip(' ') if not isdir(folder): os.chdir(dirname(folder)) os.chdir(folder) files = os.listdir() files.sort() n = len(files) assert 'y' == input(f'There are {n} files here. Proceed? y/n > ').lower() with Jdt(n) as j: for i, fn in enumerate(files): _, ext = splitext(fn) os.rename(fn, f'{i}{ext}') j.acc() print('Rename complete. ') os.chdir(folder) base = basename(folder) command = f'{FFMPEG} -r 30 -i %d.jpg ../{base}.mp4' print(command) os.system(command) print('Success.')
def main(): sentiCs = [] topicCs = [] all_files = [] for job in jobs: f = open(f'sent_df.{job}.csv', 'r') all_files.append(f) c = csv.reader(f) sentiCs.append(c) next(c) f = open(f'output.{job}.csv', 'r') all_files.append(f) c = csv.reader(f) topicCs.append(c) next(c) with open('advanced.csv', 'w+', newline='') as f: cout = csv.writer(f) outHead = [f'polarity_{x}' for x in jobs] for job in jobs: for i in range(1, 21): outHead.append(f'topic_{job}_{i}') cout.writerow(outHead + ['doc_id']) with Jdt(51094, UPP=128) as jdt: for x in zip(*sentiCs, *topicCs): jdt.acc() senti = x[:N_JOBS] topic = x[N_JOBS:] builder = [] doc_id = senti[0][2] for sen in senti: builder.append(sen[1]) assert doc_id == sen[2] for top in topic: print(top[0], doc_id) assert top[0] == doc_id builder.extend(top[1:21]) [x.close() for x in all_files]
# Yield a result for each half of the page yield PageMerge().add(src, viewrect=(0, 0, ratio, 1)).render() yield PageMerge().add(src, viewrect=(ratio, 0, 1 - ratio, 1)).render() if __name__ == '__main__': inp = sys.argv[1:] if inp == []: inpfn = input('path/file.ext = ').strip('"') else: inpfn, = inp outfn = os.path.join(os.path.dirname(inpfn), 'unspread.' + os.path.basename(inpfn)) writer = PdfWriter(outfn) pages = PdfReader(inpfn).pages ratio = input('Ratio (default 0.5) = ') if ratio == '': mySplitpage = splitpage else: ratio = float(ratio) mySplitpage = lambda page: splitpage(page, ratio) jdt = Jdt(len(pages)) for i, page in enumerate(pages): writer.addpages(mySplitpage(page)) jdt.acc() writer.write() jdt.complete() print('start briss? ') if listen(['y', 'n']) == b'y': cmd('briss.lnk "%s"' % outfn)
# from IPython import embed ''' Test showed: mean(Canny(img, 100, 150)) either < 3 or > 15. We will use 9 as threshold. ... 2019/1/23: For a diff dataset, it's diff. ''' THRESHOLD = .1 # Change this value, if we have too many "blurred" # THRESHOLD is the threshold for blurness of img. For different lighting, placement of dish, or photo sizes, this needs to be re-calibrated. RAW = "E:/201127" # Change this to the folder name assert input(f'Are we looking at {RAW}? y/n > ') == 'y' SKIP = 1 # skip frame print(f'Taking 1 frame per {SKIP} frames') list_filename = listdir(RAW) list_filename.sort() jdt = Jdt(len(list_filename), 'Slime', UPP=8) sample = cv2.imread(RAW + '/' + list_filename[0]) resolution = tuple(reversed(sample.shape[:2])) fourcc = cv2.VideoWriter_fourcc(*'DIVX') out = cv2.VideoWriter('output.avi', fourcc, 20.0, resolution, True) lastClear = None i = -1 for filename in list_filename: jdt.acc() i = (i + 1) % SKIP if i != 0: continue raw = cv2.imread(RAW + '/' + filename) # canny = cv2.Canny(raw, 100, 150) # blur = mean(canny) # if blur < THRESHOLD: