예제 #1
0
def computeDirDiff(role, s: PickleSocket):
    print('Evaluating situation...')
    list_dir = [x for x in os.listdir() if isfile(x)]
    if role == 'c':
        total = len(list_dir)
        s.sendObj(total)
        with Jdt(total, 'Compare Hash') as jdt:
            for filename in list_dir:
                jdt.acc()
                s.sendObj(filename)
                s.sendObj(hashFile(filename))
        return s.recvObj()
    elif role == 's':
        already_good = []
        total = s.recvObj()
        with Jdt(total, 'Compare Hash') as jdt:
            for _ in range(total):
                jdt.acc()
                filename = s.recvObj()
                my_hash = None
                if isfile(filename):
                    my_hash = hashFile(filename)
                their_hash = s.recvObj()
                if my_hash is not None and their_hash == my_hash:
                    already_good.append(filename)
        result = [x for x in list_dir if x not in already_good]
        s.sendObj(result)
        return result
예제 #2
0
def main():
    with open('../data2/all_id.pickle', 'rb') as f:
        all_id = pickle.load(f)
    jdt = Jdt(len(all_id), UPP=128)
    with open('../data2/calendar_estimate.csv', 'w', newline='') as f:
        c = csv.writer(f)
        c.writerow([
            'id',
            'probability_closed',
            *[f'probability_unavailable_{x}' for x in X1],
            *[f'occupancy_rate_{x}' for x in X1],
        ])
        [(oneListing(x, c), jdt.acc()) for x in all_id]
    jdt.complete()
    print('ok')
예제 #3
0
def main():
    with open('../raw/listings_details.csv', 'r', encoding='utf-8') as fin:
        cin = csv.reader(fin)
        header_in = next(cin)
        header_in_dict = {x: header_in.index(x) for x in all_text}
        header_in_dict['id'] = header_in.index('id')

        def lookup(row, x):
            return row[header_in_dict[x]]

        with open('./text.csv', 'w+', newline='', encoding='utf-8') as fout:
            cout = csv.writer(fout)
            header_out = [
                'id',
                *all_text,
                *[wc(x) for x in need_word_count],
                'name_is_all_cap',
                'name_avg_word_len',
                'doc_id',
            ]
            cout.writerow(header_out)
            with Jdt(50000, UPP=128) as jdt:
                for row in cin:
                    jdt.acc()
                    handle(row, cout, lookup)
예제 #4
0
def main():
    with open('../data2/all_id.pickle', 'rb') as f:
        all_id = pickle.load(f)
    with Jdt(len(all_id), UPP=128) as jdt:
        with open('../data2/large_chunks.csv', 'w+', newline='') as f:
            c = csv.writer(f)
            c.writerow([
                'id',
                *[f'chunk_of_unavai_{x}' for x in range(4)],
            ])
            for x in all_id:
                jdt.acc()
                c.writerow([x, *largeChunks(x)])
예제 #5
0
def main():
  all_imgs = [x.split('_')[0] for x in listAll(IMGS)]
  print(len(all_imgs), 'images on disk')
  all_docs = listAll(DOCS)
  print(len(all_docs), 'docs viewed')
  wows = []
  saves = []
  with Jdt(len(all_docs), 'filtering', UPP=4) as j:
    for doc_id in all_docs:
      j.acc()
      doc: Doc = loadDoc(doc_id)
      if doc.response == RES_SAVE:
        saves.append(doc)
      elif doc.response == RES_WOW:
        wows.append(doc)
  lacks = set([x.id for x in saves]) - set(all_imgs)
  if lacks:
    print('Document response is SAVE but image not on disk:')
    [print(x) for x in lacks]
    print('Download now?')
    if input('y/n ').lower() == 'y':
      for id in lacks:
        doc = [x for x in saves if x.id == id][0]
        print('Getting', doc.id, '...')
        imgs = [getImage(x) for x in doc.img_urls]
        saveImg(doc, imgs)
      print('Complete.')
  p = input('Input path to download to: ')
  fns = [(osp.join(p, doc.id) + '.' + doc.img_type, doc) for doc in wows]
  missing_fns = [x for x in fns if not osp.exists(x[0])]
  print(f'There are {len(fns)} wow ones. {len(missing_fns)} are not on disk.')
  with Jdt(len(missing_fns)) as j:
    for fn, doc in missing_fns:
      j.acc()
      img = getImage(doc.img_urls[0])
      with open(fn, 'wb+') as f:
        f.write(img)
  input('Ok. Press enter to quit...')
예제 #6
0
파일: clean.py 프로젝트: Daniel-Chin/airbnb
def main():
    with open('../data2/pilot_features.csv', 'r', encoding='utf-8') as f:
        c = csv.reader(f)
        head = next(c)
        with open('../data2/cleaned.csv', 'w+', encoding='utf-8',
                  newline='') as outF:
            out = csv.writer(outF)
            amen_col = head.index('amenities')
            rate_col = head.index('host_response_rate')
            veri_col = head.index('host_verifications')
            pric_col = head.index('weekly_price') - 1
            fill_0 = [
                head.index(x) for x in ['security_deposit', 'cleaning_fee']
            ]
            fill_1 = [
                head.index(x) for x in [
                    'host_listings_count', 'host_total_listings_count',
                    'bedrooms', 'beds'
                ]
            ]
            outHead = head[:]
            outHead[veri_col] = 'num_of_host_verifications'
            outHead[pric_col + 1] = 'weekly_discount'
            outHead[pric_col + 2] = 'monthly_discount'
            out.writerow(outHead[:amen_col] + ['amen_' + x for x in ALL_AMEN] +
                         outHead[amen_col + 1:])
            with Jdt(50000, 'cleaning', UPP=256) as jdt:
                for row in c:
                    jdt.acc()
                    try:
                        row[rate_col] = rate(row[rate_col])
                        row[rate_col + 1] = rate(row[rate_col + 1])
                        row[veri_col] = verificaitons(row[veri_col])
                        row[pric_col:pric_col +
                            3] = prices(*row[pric_col:pric_col + 3])
                        for col in fill_0:
                            if not row[col]:
                                row[col] = '0'
                        for col in fill_1:
                            if not row[col]:
                                row[col] = '1'
                        row = row[:amen_col] + amen(
                            row[amen_col]) + row[amen_col + 1:]
                        out.writerow(row)
                    except:
                        with open('error.log', 'a') as f:
                            print(row[0], file=f)
                            import traceback
                            traceback.print_exc()
예제 #7
0
def overallLoss(x, which_end, msg=''):
    jdt = Jdt(len_list_dir, msg=msg, UPP=128)
    acc = 0
    for filename in list_dir:
        jdt.acc()
        acc += lossFile(filename.split('.')[0], x, which_end)
    jdt.complete()
    with open('m2_x1x2_result.csv', 'a') as f:
        print(x, which_end, acc / len_list_dir, sep=',', file=f)
    return acc
예제 #8
0
def main():
    print('ls...')
    overall = loadOverall()
    baseline = score(overall)
    tagInfos = []
    la = listAll(TAGS)
    with Jdt(len(la), 'Evaluating...', UPP=8) as j:
        for i in la:
            j.acc()
            ti = load(i)
            total = 0
            for r in ALL_RESPONSES:
                total += ti.n_responses.get(r, 0)
            if ti.type == 'artist':
                if total < ARTIST_TOTAL_THRESHOLD:
                    continue
            else:
                if total < 10:
                    continue
            try:
                s = score(ti.n_responses) - baseline
            except ZeroDivisionError:
                s = 0
            if abs(s) > 0.2:
                tagInfos.append((ti, s))
    print('Sorting...')
    tagInfos.sort(key=lambda x: x[1])
    print()
    FILENAME = 'tags_summary.csv'
    print(f'writing {FILENAME}...')
    header = ['Score', 'Type', 'Display', 'Name']
    with open(FILENAME, 'w', encoding='utf-8', newline='') as f:
        c = csv.writer(f)
        c.writerow(header)
        for ti, s in tagInfos:
            c.writerow([
                format(s, '+.1f'),
                str(ti.type),
                ti.display,
                ti.name,
            ])
    with open(FILENAME, 'r', encoding='utf-8') as f:
        c = csv.reader(f)
        table = [*c]
    printTable(table)
예제 #9
0
def main():
    res = get(HOST + PATH)
    os.chdir(DEST)
    dir_name = datetime.today().strftime('%Y-%m-%d')
    os.mkdir(dir_name)
    os.chdir(dir_name)
    parts = res.text.split(LEFT)[1:]
    with Jdt(len(parts), 'Downloading') as j:
        for part in parts:
            fullname = part.split(RIGHT, 1)[0]
            _, filename = path.split(fullname)
            if filename == '..':
                j.acc()
                continue
            # print(filename)
            res = get(HOST + fullname)
            with open(filename, 'wb') as f:
                f.write(res.content)
            j.acc()
    input('Done! Enter...')
예제 #10
0
def getListOfFrames(folder, duration_of_interest, region_of_interest):
    os.chdir(folder)
    frames = []
    jdt = Jdt(len(duration_of_interest), 'reading', UPP=8)
    for filename in duration_of_interest:
        jdt.acc()
        try:
            image = Image.open(filename)
        except:
            print('Warning: Failed to open file', filename)
            continue
        frames.append(np.array(image.crop(region_of_interest)))
    jdt.complete()
    os.chdir('..')
    return frames
예제 #11
0
def main():
  with open('../raw/listings.csv', 'r', encoding='utf-8') as listingF:
    listingC = csv.reader(listingF)
    next(listingC)
    with open('../data2/calendar_estimate.csv', 'r') as estimateF:
      estimateC = csv.reader(estimateF)
      next(estimateC)
      with open('../data2/review_rate_regression.csv', 'w+', newline='') as outF:
        out = csv.writer(outF)
        out.writerow([
          'id', 'is_review_recent_z_score', 'reviews_per_month', 
          *[f'y_{x}' for x in X1], 
          'length_of_stay', 
        ])
        scrape_day = datetime.datetime.strptime('2/12/2020', '%m/%d/%Y')
        with Jdt(len(getAllId()), UPP = 16) as j:
          for id2, _, *estimate_line in estimateC:
            j.acc()
            while True:
              line = next(listingC)
              id = line[0]
              if id == id2:
                break
            else:
              raise Exception('error 328')
            min_nights, _, last_review, rpm = line[10:14]
            occupancy = estimate_line[len(X1):]
            length_of_stay = max(int(min_nights), AVG_LENGTH)
            y = [float(x) / length_of_stay for x in occupancy]
            if last_review == '':
              z_score = 999
            else:
              date = datetime.datetime.strptime(last_review, '%Y-%m-%d')
              blank = (scrape_day - date).days
              if blank < 3:
                z_score = 0
              else:
                z_score = blank / ((365.25 / 12) / float(rpm))
            out.writerow([id, z_score, rpm, *y, length_of_stay])
  print('ok')
예제 #12
0
def main():
    folder = input('Drag folder/photo here and press Enter: ')
    folder = folder.replace('\\', '').rstrip(' ')
    if not isdir(folder):
        os.chdir(dirname(folder))
    os.chdir(folder)
    files = os.listdir()
    files.sort()
    n = len(files)
    assert 'y' == input(f'There are {n} files here. Proceed? y/n > ').lower()
    with Jdt(n) as j:
        for i, fn in enumerate(files):
            _, ext = splitext(fn)
            os.rename(fn, f'{i}{ext}')
            j.acc()
    print('Rename complete. ')
    os.chdir(folder)
    base = basename(folder)
    command = f'{FFMPEG} -r 30 -i %d.jpg ../{base}.mp4'
    print(command)
    os.system(command)
    print('Success.')
예제 #13
0
def main():
    sentiCs = []
    topicCs = []
    all_files = []
    for job in jobs:
        f = open(f'sent_df.{job}.csv', 'r')
        all_files.append(f)
        c = csv.reader(f)
        sentiCs.append(c)
        next(c)
        f = open(f'output.{job}.csv', 'r')
        all_files.append(f)
        c = csv.reader(f)
        topicCs.append(c)
        next(c)
    with open('advanced.csv', 'w+', newline='') as f:
        cout = csv.writer(f)
        outHead = [f'polarity_{x}' for x in jobs]
        for job in jobs:
            for i in range(1, 21):
                outHead.append(f'topic_{job}_{i}')
        cout.writerow(outHead + ['doc_id'])
        with Jdt(51094, UPP=128) as jdt:
            for x in zip(*sentiCs, *topicCs):
                jdt.acc()
                senti = x[:N_JOBS]
                topic = x[N_JOBS:]
                builder = []
                doc_id = senti[0][2]
                for sen in senti:
                    builder.append(sen[1])
                    assert doc_id == sen[2]
                for top in topic:
                    print(top[0], doc_id)
                    assert top[0] == doc_id
                    builder.extend(top[1:21])
        [x.close() for x in all_files]
예제 #14
0
    # Yield a result for each half of the page
    yield PageMerge().add(src, viewrect=(0, 0, ratio, 1)).render()
    yield PageMerge().add(src, viewrect=(ratio, 0, 1 - ratio, 1)).render()


if __name__ == '__main__':
    inp = sys.argv[1:]
    if inp == []:
        inpfn = input('path/file.ext = ').strip('"')
    else:
        inpfn, = inp
    outfn = os.path.join(os.path.dirname(inpfn),
                         'unspread.' + os.path.basename(inpfn))
    writer = PdfWriter(outfn)
    pages = PdfReader(inpfn).pages
    ratio = input('Ratio (default 0.5) = ')
    if ratio == '':
        mySplitpage = splitpage
    else:
        ratio = float(ratio)
        mySplitpage = lambda page: splitpage(page, ratio)
    jdt = Jdt(len(pages))
    for i, page in enumerate(pages):
        writer.addpages(mySplitpage(page))
        jdt.acc()
    writer.write()
    jdt.complete()
    print('start briss? ')
    if listen(['y', 'n']) == b'y':
        cmd('briss.lnk "%s"' % outfn)
예제 #15
0
# from IPython import embed
'''
Test showed: mean(Canny(img, 100, 150)) either < 3 or > 15. 
We will use 9 as threshold. 
... 2019/1/23: For a diff dataset, it's diff. 
'''
THRESHOLD = .1  # Change this value, if we have too many "blurred"
# THRESHOLD is the threshold for blurness of img. For different lighting, placement of dish, or photo sizes, this needs to be re-calibrated.
RAW = "E:/201127"  # Change this to the folder name
assert input(f'Are we looking at {RAW}? y/n > ') == 'y'
SKIP = 1  # skip frame
print(f'Taking 1 frame per {SKIP} frames')

list_filename = listdir(RAW)
list_filename.sort()
jdt = Jdt(len(list_filename), 'Slime', UPP=8)
sample = cv2.imread(RAW + '/' + list_filename[0])
resolution = tuple(reversed(sample.shape[:2]))
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter('output.avi', fourcc, 20.0, resolution, True)
lastClear = None
i = -1
for filename in list_filename:
    jdt.acc()
    i = (i + 1) % SKIP
    if i != 0:
        continue
    raw = cv2.imread(RAW + '/' + filename)
    # canny = cv2.Canny(raw, 100, 150)
    # blur = mean(canny)
    # if blur < THRESHOLD: