예제 #1
0
    def handle(self, *args, **kwargs):
        if not hasattr(settings, 'SUPERFASTMATCH'):
            raise CommandError('You must configure SUPERFASTMATCH in your project settings.')

        if not hasattr(settings, 'DEFAULT_DOCTYPE'):
            raise CommandError('You must specify a DEFAULT_DOCTYPE in your project settings.')

        self.sfm = from_django_conf()

        sources = Source.objects.filter(source_type=2)

        for source in sources:
            try:
                if source.is_stale():
                    self.scrape_releases(source)
                    source.last_retrieved = now()
                    source.save()

                    failures = SourceScrapeFailure.objects.filter(resolved__isnull=True,
                                                                  source=source)
                    for f in failures:
                        f.resolved = now()
                        f.save()

            except SourceScrapeFailure as failure:
                failure.save()

            except Exception as e:
                failure = SourceScrapeFailure.objects.create(source=source,
                                                             description=unicode(e))
def download(url, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    reqReq = [x.upper() for x in reqFields]
    dName = outPath

    iYear = reqReq[0]
    iMonth = reqReq[1]

    if reqReq[2] != 'ALL':
        reqs = reqReq[2:]

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # operate this csv file
    logfile.write(str(now.now()) + ' csv file loading\n')
    print('csv file loading------')
    df = pd.read_csv(socket, dtype='unicode')
    cList = df.columns.tolist()

    if reqReq[2] == 'ALL':
        reqs = cList[8:]

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')

    list0 = df.loc[:, col[0]].tolist()
    list1 = df.loc[:, col[1]].tolist()
    list2 = df.loc[:, col[2]].tolist()
    list3 = df.loc[:, col[3]].tolist()
    list4 = df.loc[:, col[4]].tolist()

    for req in reqs:
        if req not in cList:
            errfile.write(str(now.now()) + " Requested data " + str(req) + " don't match the csv file. Please check the file at: " + str(url) + " . End progress\n")
            logfile.write(str(now.now()) + ' error and end progress\n')
            sys.exit("Requested data " + str(req) + " don't match the excel file. Please check the file at: " + url)

        valueList = df.loc[:, req].tolist()

        raw_data[col[0]] = raw_data[col[0]] + list0
        raw_data[col[1]] = raw_data[col[1]] + list1
        raw_data[col[2]] = raw_data[col[2]] + list2
        raw_data[col[3]] = raw_data[col[3]] + list3
        raw_data[col[4]] = raw_data[col[4]] + list4
        raw_data[col[5]] = raw_data[col[5]] + [req.split('_')[0]] * len(valueList)
        raw_data[col[6]] = raw_data[col[6]] + [req.split('_')[1]] * len(valueList)
        raw_data[col[7]] = raw_data[col[7]] + valueList

    raw_data[col[8]] = [iYear] * len(raw_data[col[0]])
    raw_data[col[9]] = [iMonth] * len(raw_data[col[0]])
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
예제 #3
0
파일: ceefax.py 프로젝트: Giannie/KLBFAX
    def standard(self):
        i = 0
        num_cycles_left = 0
        the_page = None

        while True:
            if i >= num_cycles_left:
                if now.now().strftime("%H") == "12" and now.now().minute < 20:
                    the_page = page.LunchPage()
                else:
                    the_page = pageFactory.get_loaded_random()
            try:
                signal = ThreadSignaller.queue.get_nowait()
                if isinstance(signal, ThreadSignaller.ShowPage):
                    the_page = pageFactory.get_reloaded_page(signal.page_num)
                elif isinstance(signal, ThreadSignaller.ShowGreetingPage):
                    the_page = get_greeting_page(signal.barcode)
                elif signal == ThreadSignaller.CleanExit:
                    self.weather_thread.stop()
                    sys.exit()
                elif signal == ThreadSignaller.InterruptStandardLoop:
                    break
            except Queue.Empty:
                pass

            if the_page:
                num_cycles_left = self._get_cycles_left(the_page.duration_sec)
                i = 0
                the_page.show()
                the_page = None
            if not the_page:
                i += 1
                time.sleep(config.sleeping_time_ms / 1000.0)
def download(inPath, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    dName = outPath

    genderArray = ["female", "male"]

    listinPath = inPath[0].split('/')
    iYear = listinPath[len(listinPath) - 1].split('-')[0]

    iPopID = "ONS-" + iYear + "-based-LAD-Subnational-Population-Projections"
    #iPopType = "Base"
    iPopdescription = "ONS projections (http://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationprojections/datasets/localauthoritiesinenglandtable2)"

    # operate this file
    raw_data = {}
    for j in col:
        raw_data[j] = []

    for i in range(2):
        inFile = inPath[i]

        # load files
        logfile.write(str(now.now()) + ' ' + inFile + ' file loading\n')
        print(inFile + ' file loading------')

        df = pd.read_csv(inFile, dtype='unicode')
        csvcol = df.columns.tolist()
        yearcol = csvcol[3:]
        lenyearcol = len(yearcol)

        for j in range(lenyearcol):
            raw_data[col[2]] = raw_data[col[2]] + df.ix[:, 2].tolist()
            raw_data[col[4]] = raw_data[col[4]] + [yearcol[j]] * df.shape[0]
            raw_data[col[5]] = raw_data[col[5]] + df.ix[:, 0].tolist()
            raw_data[col[6]] = raw_data[col[6]] + df.ix[:, 1].tolist()
            raw_data[col[7]] = raw_data[col[7]] + df.ix[:, j + 3].tolist()
            raw_data[col[8]] = raw_data[col[8]] + [int(float(x))/int(float(df.ix[df.shape[0]-1, j + 3])) for x in df.ix[:, j + 3].tolist()]

        raw_data[col[0]] = raw_data[col[0]] + [iPopID] * df.shape[0] * lenyearcol
        #raw_data[col[1]] = raw_data[col[1]] + [iPopType] * df.shape[0] * lenyearcol
        raw_data[col[1]] = raw_data[col[1]] + [iPopdescription] * df.shape[0] * lenyearcol
        raw_data[col[3]] = raw_data[col[3]] + [genderArray[i]] * df.shape[0] * lenyearcol


    raw_data[col[7]] = [int(float(i) * 1000) for i in raw_data[col[7]]]
    raw_data[col[2]] = [i.replace("All ages", "All Ages") for i in raw_data[col[2]]]

    df1 = pd.DataFrame(raw_data)
    strings = df1.to_json(orient="records")

    jsonString = '[{"jsondata":' + strings + '}]'

    myJson = pd.read_json(jsonString)
    myJson.index = ['mydata']

    # save to file
    myJson.to_json(path_or_buf=dName, orient="index")
    logfile.write(str(now.now()) + ' has been extracted and saved as ' + str(dName) + '\n')
    print('Requested data has been extracted and saved as ' + dName)
    logfile.write(str(now.now()) + ' finished\n')
    print("finished")
예제 #5
0
파일: greetings.py 프로젝트: Giannie/KLBFAX
 def random(self):
     import now
     from random import choice
     if now.now().hour < 12:
         return choice(self.greetings + self.morning)
     if now.now().hour > 17:
         return choice(self.greetings + self.evening)
     return choice(self.greetings)
예제 #6
0
파일: ceefax.py 프로젝트: Giannie/KLBFAX
def get_greeting_page(barcode):
    namefile_path = "/home/pi/cards/" + barcode
    extra = ""
    from page import greetings
    if isfile(namefile_path):
        (name, house, twitter) = points.get_name_house(namefile_path)

        if not house:
            extra = """Error finding your house. Please
                        report to Scroggs."""

        if twitter is None:
            deets = ""
        else:
            deets = greetings.random() + " @"+twitter+"! "

        time = now.now().strftime("%H")

        name_file = points.read_name_file(namefile_path)
        if points.should_add_morning_points(time, house, name_file,
                                            barcode):
            points_added = points.add_morning_points(time, house, barcode, deets)
            extra = str(points_added) + " points to " + house + "!"

        name_page = page.NamePage(name, extra=extra)
    else:
        name_page = page.NamePage(barcode, large=False)
    return name_page
def download(url, reqInfo, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    reqReq = reqInfo
    dName = outPath

    iYear = reqReq[0]
    iMonth = reqReq[1]
    iSex = reqReq[2]

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # operate this csv file
    logfile.write(str(now.now()) + ' csv file loading\n')
    print('csv file loading------')
    df = pd.read_csv(socket, dtype='unicode')

    for k in range(0, df.shape[1]):
        if re.match(r'E\d{8}$', str(df.iloc[0][k])):
            break

    if k == df.shape[1]:
        errfile.write(str(now.now()) + " Cannot find ecode in row " + str(2) + ". Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Cannot find ecode in row " + str(2) + ". Please check the file at: " + url)

    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(0, df.shape[0], 2):
        if str(df.iloc[i][0]):
            eList = df.iloc[i, k:].dropna().tolist()
            raw_data[col[5]] = raw_data[col[5]] + eList
            raw_data[col[6]] = raw_data[col[6]] + df.iloc[i+1, k:].dropna().tolist()
            raw_data[col[0]] = raw_data[col[0]] + [df.iloc[i][0]] * len(eList)
            raw_data[col[1]] = raw_data[col[1]] + [df.iloc[i][1]] * len(eList)

    raw_data[col[2]] = [iYear] * len(raw_data[col[0]])
    raw_data[col[3]] = [iMonth] * len(raw_data[col[0]])
    raw_data[col[4]] = [iSex] * len(raw_data[col[0]])
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
예제 #8
0
파일: models.py 프로젝트: stefanw/pressley
 def is_stale(self, seconds=None):
     seconds = seconds or settings.SCRAPE_PERIOD
     if self.last_retrieved is None:
         return True
     since_last = now() - self.last_retrieved
     if since_last.total_seconds() > seconds:
         return True
     return False
예제 #9
0
파일: LunchPage.py 프로젝트: Giannie/KLBFAX
 def __init__(self):
     super(LunchPage, self).__init__("???")
     self.name = "Lunch"
     self.content = colour_print(printer.text_to_ascii("Lunchtime!"))
     if now.now().strftime("%a")=="Fri":
         self.content += "\n"
         self.content += colour_print(printer.text_to_ascii("It's Fancy Friday!"),colours.Background.RED)
     self.loaded = True
def download(url, sheet, reqFields, outPath, keyCol, digitCheckCol, noDigitRemoveFields):
    col = reqFields
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + " excel file loading\n")
    print("excel file loading------")
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # data reading
    logfile.write(str(now.now()) + " data reading\n")
    print("data reading------")
    raw_data = df.loc[:, col]

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
예제 #11
0
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    yearReq = reqFields
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + ' excel file loading\n')
    print('excel file loading------')
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # indicator checking
    logfile.write(str(now.now()) + ' indicator checking\n')
    print('indicator checking------')
    for i in range(df.shape[0]):
        yearCol = []
        for k in yearReq:
            kk = []
            k_asked = "19 in " + k[2:]
            for j in range(df.shape[1]):
                if df.iloc[i, j] == k_asked:
                    kk.append(j)
                    restartIndex = i + 1

            if len(kk) == 4:
                yearCol.append(kk[3])

        if len(yearCol) == len(yearReq):
            break

    if len(yearCol) != len(yearReq):
        errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(yearReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + url)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(restartIndex, df.shape[0]):
        if re.match(r'E\d{8}$', str(df.iloc[i, 0])):
            ii = 0
            for j in range(len(yearCol)):
                raw_data[col[0]].append(df.iloc[i, 0])
                raw_data[col[1]].append(df.iloc[i, 2])
                raw_data[col[2]].append(yearReq[ii])
                raw_data[col[3]].append(df.iloc[i, yearCol[ii]])
                ii += 1
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
예제 #12
0
def nodigit(data, check_field, remove_field, logfile):
    if len(check_field) > 0:
        # check the no digit data
        logfile.write(str(now.now()) + ' check the no digit data\n')
        print('check the no digit data')
        inrow = dvalid.checkdigit(data, check_field, logfile)

        # drop the no digit data
        remove_inrow = []
        for i in remove_field:
            remove_inrow += inrow[i]

        remove_inrow = list(set(remove_inrow))

        if len(remove_inrow) > 0:
            data = data.drop(data.index[remove_inrow])
            droppedRow = ','.join(str(x+2) for x in remove_inrow)
            print('------------------------------------')
            logfile.write(str(now.now()) + ' the dropped rows are: row ' + droppedRow + '\n')
            print('the dropped rows are: row ' + droppedRow)

    return data
예제 #13
0
def getapi(date, colFields, logfile, errfile):
    url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_18_1.data.csv?date=latest&select=date"
    minusDate = []

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # load this csv file
    df = pd.read_csv(socket, dtype='unicode')

    # get the latest date
    latestDate = df.iloc[0, 0].split('-')
    logfile.write(str(now.now()) + ' get the latest date\n')
    print('get the latest date------')

    # get the required date string
    for i in date:
        if i.lower() == "latest":
            minusDate.append('latest')
            continue

        nYear = int(latestDate[0]) - int(i.split('-')[0])
        nMonth = int(latestDate[1]) - int(i.split('-')[1])

        if nYear == 0:
            nDate = nMonth
            if nDate == 0:
                minusDate.append('latest')
            else:
                minusDate.append('latestMINUS' + str(nDate))
        else:
            nDate = nMonth + 12 * nYear
            minusDate.append('latestMINUS' + str(nDate))
    minusDate = list(set(minusDate))
    minusDate.sort(key=nkeys.natural_keys)
    dateString = ','.join(minusDate)

    # get the required selection string
    colFields = [x.lower() for x in colFields]
    colSelect = ','.join(colFields)

    # get the required API
    urlBase = 'https://www.nomisweb.co.uk/api/v01/dataset/'
    urlAPI = urlBase + 'NM_18_1.data.csv?'
    urlAPI += 'geography=1946157199...1946157245&'
    urlAPI += 'date=' + dateString + '&'
    urlAPI += 'age=MAKE|Aged%2016-24|1;2&duration=MAKE|Up%20to%206%20months|1...7,MAKE|Over%206%20months%20and%20up%20to%20a%20year|8;9,MAKE|Over%201%20year|10...16&sex=5,6&measures=20100,20206'
    urlAPI += '&select=' + colSelect

    return urlAPI
예제 #14
0
def download(url, outPath, keyCol, digitCheckCol, noDigitRemoveFields, logfile, errfile):
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # load this csv file
    logfile.write(str(now.now()) + ' csv file loading\n')
    print('csv file loading------')
    df = pd.read_csv(socket, dtype='unicode')
    col = df.columns.tolist()

    # save csv file
    dsave.save(df, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
예제 #15
0
def checkdigit(data, field, logfile):
    inrow = {}
    for j in field:
        inrow[j] = []

    for j in field:
        for i in data.index.tolist():
            if str(data[j][i]).strip().isdigit() != True:
                cData = str(data[j][i]).strip().split('.')
                if not ((len(cData) == 2)and(cData[0].isdigit())and(cData[1].isdigit())):
                    inrow[j].append(i)
                    print('------------------------------------')
                    logfile.write(str(now.now()) + ' the value is not a digit number at: row ' + str(i+2) + ', col ' + j + '\n')
                    print('the value is not a digit number at: row ' + str(i+2) + ', col ' + j)

    return inrow
예제 #16
0
def all_url(kind, rid):
    if kind == "bilibili":
        return bilibili(rid)
    if kind == "chushou":
        return chushou(rid)
    if kind == "douyin":
        return douyin(rid)
    if kind == "douyu":
        return douyu(rid)
    if kind == "egame":
        return egame(rid)
    if kind == "huajiao":
        return huajiao(rid)
    if kind == "huomao":
        return huomao(rid)
    if kind == "huya":
        return huya(rid)
    if kind == "iqiyi":
        return iqiyi(rid)
    if kind == "kuaishou":
        return kuaishou(rid)
    if kind == "kugou":
        return kugou(rid)
    if kind == "longzhu":
        return longzhu(rid)
    if kind == "now":
        return now(rid)
    if kind == "pps":
        return pps(rid)
    if kind == "v6cn":
        return v6cn(rid)
    if kind == "wangyicc":
        return wangyicc(rid)
    if kind == "xigua":
        return xigua(rid)
    if kind == "yingke":
        return yingke(rid)
    if kind == "yizhibo":
        return yizhibo(rid)
    if kind == "yy":
        return yy(rid)
    if kind == "zhanqi":
        return zhanqi(rid)
예제 #17
0
    def scrape_releases(self, source):
        feed = source.fetch_feed()

        for entry in feed['entries']:
            link = entry.get('link')
            if link.lower()[-4:] == ".pdf":
                logging.warn("Skipping PDF link: {0}".format(link))
                continue

            title = entry.get('title')
            date = dateutil.parser.parse(entry.get('published') or
                                         entry.get('updated') or
                                         entry.get('a10:updated') or
                                         now())
            source_name = source.organization
            body = get_link_content(link)

            (release, created) = Release.objects.get_or_create(url=link,
                                                               title=title,
                                                               date=date,
                                                               body=body,
                                                               source=source)
            if body is None or len(body.strip()) == 0:
                continue

            try:
                result = self.sfm.add(doctype=source.doc_type or settings.DEFAULT_DOCTYPE,
                                      docid=release.id,
                                      text=body,
                                      defer=True,
                                      source=source_name,
                                      date=date,
                                      title=title,
                                      put=False)
            except superfastmatch.SuperFastMatchError as e:
                raise SourceScrapeFailure(source=source, description=unicode(e))

            if result['success'] != True:
                msg = 'Superfastmatch failure: {0}'.format(result.get('error', ''))
                raise SourceScrapeFailure(source,
                                          description=msg)
예제 #18
0
def save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile):
    # write csv file
    logfile.write(str(now.now()) + ' writing to file\n')
    print('writing to file ' + dName)
    df = pd.DataFrame(raw_data, columns=col)
    df.columns = [x.title() for x in col]
    col = df.columns.tolist()

    # clean data--remove spaces
    logfile.write(str(now.now()) + ' data cleaning\n')
    print('data cleaning------')
    df = dclean.stripcsv(df, col)

    # remove the cell with no digit data
    check_field = [x.title() for x in digitCheckCol]
    remove_field = [x.title() for x in noDigitRemoveFields]
    df = dclean.nodigit(df, check_field, remove_field, logfile)

    # delete the duplicate data
    logfile.write(str(now.now()) + ' check and delete the duplicate data\n')
    print('check and delete the duplicate data------')
    df = df.drop_duplicates(col, take_last=True)

    # create primary key by md5 for each row
    if len(keyCol) != 0:
        logfile.write(str(now.now()) + ' create primary key\n')
        print('create primary key------')
        col += ['pkey']
        keyCol = [x.title() for x in keyCol]
        df[col[-1]] = fpkey.fpkey(df, keyCol)
        logfile.write(str(now.now()) + ' create primary key end\n')
        print('create primary key end------')
    else:
        print('no primary key------')

    # save to file
    df.to_csv(dName, index=False)
    logfile.write(str(now.now()) + ' has been extracted and saved as ' + str(dName) + '\n')
    print('Requested data has been extracted and saved as ' + dName)
    logfile.write(str(now.now()) + ' finished\n')
    print("finished")
예제 #19
0
def openurl(url, logfile, errfile):
    try:
        socket = urllib.request.urlopen(url)
        logfile.write(str(now.now()) + ' open url\n')
        print('open url------')
    except urllib.error.HTTPError as e:
        errfile.write(str(now.now()) + ' file download HTTPError is ' + str(e.code) + ' . End progress\n')
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit('file download HTTPError = ' + str(e.code))
    except urllib.error.URLError as e:
        errfile.write(str(now.now()) + ' file download URLError is ' + str(e.args) + ' . End progress\n')
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit('file download URLError = ' + str(e.args))
    except Exception:
        print('file download error')
        import traceback
        errfile.write(str(now.now()) + ' generic exception: ' + str(traceback.format_exc()) + ' . End progress\n')
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit('generic exception: ' + traceback.format_exc())

    return socket
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol,
             noDigitRemoveFields):
    yearReq = reqFields
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + ' excel file loading\n')
    print('excel file loading------')
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # indicator checking
    logfile.write(str(now.now()) + ' indicator checking\n')
    print('indicator checking------')
    for i in range(df.shape[0]):
        yearCol = []
        for k in yearReq:
            k_asked = k
            for j in range(df.shape[1]):
                if str(k_asked) in str(df.iloc[i, j]):
                    yearCol.append(j)
                    restartIndex = i + 1

        if len(yearCol) == len(yearReq):
            break

    if len(yearCol) != len(yearReq):
        errfile.write(
            str(now.now()) + " Requested data " + str(yearReq).strip('[]') +
            " don't match the excel file. Please check the file at: " +
            str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(yearReq).strip('[]') +
                 " don't match the excel file. Please check the file at: " +
                 url)

    yearCol.append(df.shape[1])

    for i in range(restartIndex, df.shape[0]):
        kk = []
        k_asked = "All Apprenticeships"
        for k in range(len(yearCol) - 1):
            for j in range(yearCol[k], yearCol[k + 1]):
                if df.iloc[i, j] == k_asked:
                    kk.append(j)
                    restartIndex = i + 1
                    break

        if len(kk) == len(yearReq):
            break

    yearCol.pop()

    if len(kk) != len(yearReq):
        errfile.write(
            str(now.now()) + " Requested data " + str(yearReq).strip('[]') +
            " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: "
            + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit(
            "Requested data " + str(yearReq).strip('[]') +
            " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: "
            + url)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(restartIndex, df.shape[0]):
        ii = 0
        for k in kk:
            if (pd.notnull(df.iloc[i, 1])) and (pd.notnull(
                    df.iloc[i, k])) and (df.iloc[i, 1] != "Total"):
                ij = 0
                for jj in ["Under 19", "19-24"]:
                    raw_data[col[0]].append(df.iloc[i, 1])
                    raw_data[col[1]].append(yearReq[ii])
                    raw_data[col[2]].append(jj)
                    raw_data[col[3]].append(df.iloc[i, k + ij])

                    ij += 1

            ii += 1
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields,
               dName, logfile)
예제 #21
0
        'X-Requested-With': 'XMLHttpRequest'
    }
    res = requests.get(url, headers=headers)
    return res


if __name__ == '__main__':
    with open("urllist.txt", 'r') as f:
        urllist = [i for i in f]
    while urllist:
        try:
            url = urllist.pop(0)
            res = get_res(url)
            html = etree.HTML(res.text)
            # 获取下页链接
            imgurl = html.xpath("//div[@class='single-wallpaper']/img/@src")
            if imgurl:
                tmp_img = get_res(imgurl[0])
                with open('4k/' + imgurl[0].split('/')[-1], 'wb') as im:
                    im.write(tmp_img.content)
            print(now.now(), "写入了:", imgurl[0].split('/')[-1])
            with open('urllist.txt', 'w') as p:
                for i in urllist:
                    p.write(i + '\n')
            time.sleep(1)
        except Exception as e:
            with open('urllist.txt', 'w') as p:
                for i in urllist:
                    p.write(i + '\n')
            print('出错了,休息3s继续')
            time.sleep(3)
예제 #22
0
파일: Page.py 프로젝트: Giannie/KLBFAX
 def now(self):
     return now.now()
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    yearReq = reqFields
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + ' excel file loading\n')
    print('excel file loading------')
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # indicator checking
    logfile.write(str(now.now()) + ' indicator checking\n')
    print('indicator checking------')
    for i in range(df.shape[0]):
        yearCol = []
        for k in yearReq:
            k_asked = k
            for j in range(df.shape[1]):
                if str(k_asked) in str(df.iloc[i, j]):
                    yearCol.append(j)
                    restartIndex = i + 1

        if len(yearCol) == len(yearReq):
            break

    if len(yearCol) != len(yearReq):
        errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(yearReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + url)

    yearCol.append(df.shape[1])

    for i in range(restartIndex, df.shape[0]):
        kk = []
        k_asked = "All Apprenticeships"
        for k in range(len(yearCol)-1):
            for j in range(yearCol[k], yearCol[k+1]):
                if df.iloc[i, j] == k_asked:
                    kk.append(j)
                    restartIndex = i + 1
                    break

        if len(kk) == len(yearReq):
            break

    yearCol.pop()

    if len(kk) != len(yearReq):
        errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip(
            '[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(yearReq).strip(
            '[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + url)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(restartIndex, df.shape[0]):
            ii = 0
            for k in kk:
                if (pd.notnull(df.iloc[i, 1])) and (pd.notnull(df.iloc[i, k])) and (df.iloc[i, 1] != "Total"):
                    ij = 0
                    for jj in ["Under 19", "19-24"]:
                        raw_data[col[0]].append(df.iloc[i, 1])
                        raw_data[col[1]].append(yearReq[ii])
                        raw_data[col[2]].append(jj)
                        raw_data[col[3]].append(df.iloc[i, k+ij])

                        ij += 1

                ii += 1
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    dName = outPath

    listurl = url.split('/')
    pDate = listurl[len(listurl) - 1][:4]

    # operate this excel file
    logfile.write(str(now.now()) + ' excel file loading\n')
    print('excel file loading------')
    xd = pd.ExcelFile(url)
    sheets = xd.sheet_names

    raw_data = {}
    for j in col:
        raw_data[j] = []

    for sheet in sheets:
        df = xd.parse(sheet)

        logfile.write(str(now.now()) + ' for sheet ' + str(sheet) + '------\n')
        logfile.write(str(now.now()) + ' indicator checking\n')
        print('for sheet ' + str(sheet) + ' ------')
        print('indicator checking------')

        # indicator checking
        fflag = 0
        for i in range(df.shape[0]):
            for j in range(df.shape[1]):
                if ('Aged' in str(df.iloc[i][j]).split()) and (len(str(df.iloc[i][j]).split()) == 2):
                    fflag = 1
                    break

            if fflag == 1:
                ageReq = df.iloc[i][j:-1].tolist()
                restartIndex = i + 1
                break

        if fflag == 0:
            errfile.write(str(now.now()) + " The sheet " + str(sheet) + " has not required fields, such as 'Aged 10-14'. Please check the file at: " + str(url) + " . End progress\n")
            logfile.write(str(now.now()) + ' error and end progress\n')
            sys.exit("The sheet " + str(sheet) + " has not not required fields, such as 'Aged 10-14'. Please check the file at: " + url)

        # data reading
        logfile.write(str(now.now()) + ' data reading\n')
        print('data reading------')
        for i in range(restartIndex, df.shape[0]):
            if str(df.iloc[i][0]):
                for k in ageReq:
                    raw_data[col[5]].append(k.split()[1])

                raw_data[col[0]] = raw_data[col[0]] + [(df.iloc[i][0])] * len(ageReq)
                raw_data[col[1]] = raw_data[col[1]] + [(df.iloc[i][1])] * len(ageReq)
                raw_data[col[2]] = raw_data[col[2]] + [(df.iloc[i][2])] * len(ageReq)
                raw_data[col[4]] = raw_data[col[4]] + [(df.iloc[i][3])] * len(ageReq)
                raw_data[col[6]] = raw_data[col[6]] + df.iloc[i][j:-1].tolist()

        raw_data[col[3]] = raw_data[col[3]] + [sheet] * len(ageReq) * (df.shape[0] - restartIndex)


    raw_data[col[7]] = [pDate] * len(raw_data[col[0]])
    raw_data[col[8]] = ["HCC_SAPF_2015"] * len(raw_data[col[0]])
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
args = parser.parse_args()

if args.generateConfig:
    obj = {
        "url": "http://www.hscic.gov.uk/catalogue/PUB13365/gp-reg-patients-01-2014.csv",
        "outPath": "tempGpPatientsAge.csv",
        #"reqFields": ["2014", "01", "MALE_0-4", "MALE_5-9", "MALE_10-14", "FEMALE_0-4", "FEMALE_5-9", "FEMALE_10-14"]
        "reqFields": ["2014", "01", "all"], #"all" means all "male" and "female" fields
        "colFields": ['GP_PRACTICE_CODE', 'POSTCODE', 'CCG_CODE', 'NHSE_AREA_TEAM_CODE', 'NHSE_REGION_CODE', 'Sex', 'Age', 'Value', 'Year',  'Month'],
        "primaryKeyCol": ['GP_PRACTICE_CODE', 'CCG_CODE', 'NHSE_AREA_TEAM_CODE', 'NHSE_REGION_CODE', 'Sex', 'Age', 'Year',  'Month'],#[0, 2, 3, 4, 5, 6, 8, 9],
        "digitCheckCol": ['Value'],#[7],
        "noDigitRemoveFields": []
    }

    logfile = open("log_tempGpPatientsAge.log", "w")
    logfile.write(str(now.now()) + ' start\n')

    errfile = open("err_tempGpPatientsAge.err", "w")

    with open("config_tempGpPatientsAge.json", "w") as outfile:
        json.dump(obj, outfile, indent=4)
        logfile.write(str(now.now()) + ' config file generated and end\n')
        sys.exit("config file generated")

if args.configFile == None:
    args.configFile = "config_tempGpPatientsAge.json"

with open(args.configFile) as json_file:
    oConfig = json.load(json_file)

    logfile = open('log_' + oConfig["outPath"].split('.')[0] + '.log', "w")
args = parser.parse_args()

if args.generateConfig:
    obj = {
        "url": "https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/417835/SFR10_2015_Local_authority_tables.xlsx",
        "outPath": "tempPerTru.csv",
        "sheet": "Table_11_1",
        "reqFields": ["State-funded primary, secondary and special schools (5)"],
        "colFields": ["ecode", "name", "year", "value"],
        "primaryKeyCol": ["ecode", "year"],  # [0, 2],
        "digitCheckCol": ["value"],  # [3],
        "noDigitRemoveFields": [],
    }

    logfile = open("log_tempPerTru.log", "w")
    logfile.write(str(now.now()) + " start\n")

    errfile = open("err_tempPerTru.err", "w")

    with open("config_tempPerTru.json", "w") as outfile:
        json.dump(obj, outfile, indent=4)
        logfile.write(str(now.now()) + " config file generated and end\n")
        sys.exit("config file generated")

if args.configFile == None:
    args.configFile = "config_tempPerTru.json"

with open(args.configFile) as json_file:
    oConfig = json.load(json_file)

    logfile = open("log_" + oConfig["outPath"].split(".")[0] + ".log", "w")
예제 #27
0
파일: 400.py 프로젝트: Giannie/KLBFAX
    def generate_content(self,debug=False):
        from clock import clock
        minute = [[False]*len(i) for i in clock]
        hour   = [[False]*len(i) for i in clock]
        tag = "KLB Mean Time"
        
        current_minute = float(now().strftime("%M"))
        current_hour = float(now().strftime("%I"))
        current_weekday = now().strftime("%a")
        if current_weekday == "Mon": bgcolor = self.colours.Style.BLINK+self.colours.Background.RED
        if current_weekday == "Tue": bgcolor = self.colours.Style.BLINK+self.colours.Background.YELLOW
        if current_weekday == "Wed": bgcolor = self.colours.Style.BLINK+self.colours.Background.CYAN
        if current_weekday == "Thu": bgcolor = self.colours.Style.BLINK+self.colours.Background.GREEN
        if current_weekday == "Fri": bgcolor = self.colours.Style.BLINK+self.colours.Background.MAGENTA
        if current_weekday == "Sat": bgcolor = self.colours.Style.BLINK+self.colours.Background.BLUE
        if current_weekday == "Sun": bgcolor = self.colours.Background.RED
        content = colour_print(printer.text_to_ascii(now().strftime("%A %-d %b")),background=bgcolor,foreground=self.colours.Foreground.BLACK)
        content += "\n"
    
        circle_radius = 19
        screen_radius = 19

#        num_points = 250
#        circle_x=np.array([circle_radius*np.cos(t) for t in range(num_points)])
#        circle_y=np.array([circle_radius*np.sin(t) for t in range(num_points)])
#        circle_points=[np.complex(x,y) for x,y in zip(circle_x,circle_y)] 

        d = .3
        num_points = 25
        current_hourtopointat = current_hour + current_minute/60.

        for a in range(0,num_points+1):
            r = circle_radius*a*.5/num_points
            hx = r*cos(pi/2 - current_hourtopointat*2*pi/12)
            hy = r*sin(pi/2 - current_hourtopointat*2*pi/12)
            for dx in [-d,d]:
                for dy in [-d,d]:
                    hour_x = screen_radius + int(floor(hx+.5+dx))
                    hour_y = screen_radius - int(floor(hy+.5+dy))
                    hour[hour_y][hour_x] = True
            r = circle_radius*a*.8/num_points
            mx = r*cos(pi/2 - current_minute*2*pi/60)
            my = r*sin(pi/2 - current_minute*2*pi/60)
            for dx in [-d,d]:
                for dy in [-d,d]:
                    minute_x = screen_radius + int(floor(mx+.5+dx))
                    minute_y = screen_radius - int(floor(my+.5+dy))
                    minute[minute_y][minute_x] = True
        
#        hour_x = np.array([r*np.cos(np.pi/2 - current_hourtopointat*2*np.pi/12) for r in np.arange(0,circle_radius*0.5,circle_radius*0.5/num_points)])
#        hour_y = -np.array([r*np.sin(np.pi/2 - current_hourtopointat*2*np.pi/12) for r in np.arange(0,circle_radius*0.5,circle_radius*0.5/num_points)])
#        hour_points=[np.complex(x,y) for x,y in zip(hour_x,hour_y)] 
#        minute_x = np.array([r*np.cos(np.pi/2 - current_minute*2*np.pi/60) for r in np.arange(0,circle_radius*0.8,circle_radius*0.8/num_points)])
#        minute_y = -np.array([r*np.sin(np.pi/2 - current_minute*2*np.pi/60) for r in np.arange(0,circle_radius*0.8,circle_radius*0.8/num_points)])
#        minute_points=[np.complex(x,y) for x,y in zip(minute_x,minute_y)] 
#        hourmarkers_x = np.array([r*np.cos(np.pi/2 - h*2*np.pi/12) for r in np.arange(circle_radius*0.78,circle_radius*0.8,circle_radius*0.8/num_points) for h in [0,3,6,9]])
#        hourmarkers_y = -np.array([r*np.sin(np.pi/2 - h*2*np.pi/12) for r in np.arange(circle_radius*0.78,circle_radius*0.8,circle_radius*0.8/num_points) for h in [0,3,6,9]])
#        hourmarkers_points=[np.complex(x,y) for x,y in zip(hourmarkers_x,hourmarkers_y)] 

        output = ""
        for y in range(0, 2*screen_radius+1):
            for x in range(0, 2*screen_radius+1):
#                if debug: print x,y," ",y+screen_radius,x+screen_radius, " ", clock[x][y]
                if clock[y][x] or minute[y][x] or hour[y][x]:
                    output += "X"
                else:
                    output += " "
#            output = output + "\n"
        output = output + " "*(2*screen_radius + 1)
        output2 = ""
        for y in range(0, 2*screen_radius+1, 2):
            output2 = output2 + " "*(screen_radius+1)      
            for x in range(0, 2*screen_radius+1):
                letter0 = output[y*(2*screen_radius+1)+x]
                letter1 = output[(y+1)*(2*screen_radius+1)+x]
                if letter0 == " " and letter1 == " ":
                    output2 = output2 + " "
                elif letter0 == "X" and letter1 == "X":
                    output2 = output2 + u"\u2588"
                elif letter0 == "X" and letter1 == " ":
                    output2 = output2 + u"\u2580"
                else:
                    output2 = output2 + u"\u2584"
            if y != 2*screen_radius: output2 = output2 + "\n"
        content += output2    

        self.content = content
        self.tagline = tag
예제 #28
0
parser.add_argument("--configFile", "-c", help="path for config file")
args = parser.parse_args()

if args.generateConfig:
    obj = {
        #"url": "https://www.nomisweb.co.uk/api/v01/dataset/NM_18_1.data.csv?geography=1946157199...1946157245&date=latest&age=MAKE|Aged%2016-24|1;2&duration=MAKE|Up%20to%206%20months|1...7,MAKE|Over%206%20months%20and%20up%20to%20a%20year|8;9,MAKE|Over%201%20year|10...16&sex=5,6&measures=20100,20206&select=geography_code,geography_name,sex_name,age_name,duration_name,measures_name,obs_value,date",
        "outPath": "tempYouthUnemployment.csv",
        "date": ["Latest", "2015-07", "2015-04", "2015-09", "2014-03", "2012-05"],
        "colFields": ["Geography_code", "geography_name", "sex_name", "Age_name", "duration_name", "Measures_name", "Obs_value", "Date"],
        "primaryKeyCol": ["Geography_code", "sex_name", "Age_name", "duration_name", "Measures_name", "Date"],#[0, 2, 3, 4, 5, 7],
        "digitCheckCol": ["Obs_value"],#[6],
        "noDigitRemoveFields": ["Obs_value"],#[6]
    }

    logfile = open("log_tempYouthUnemployment.log", "w")
    logfile.write(str(now.now()) + ' start\n')

    errfile = open("err_tempYouthUnemployment.err", "w")

    with open("config_tempYouthUnemployment.json", "w") as outfile:
        json.dump(obj, outfile, indent=4)
        logfile.write(str(now.now()) + ' config file generated and end\n')
        sys.exit("config file generated")

if args.configFile == None:
    args.configFile = "config_tempYouthUnemployment.json"

with open(args.configFile) as json_file:
    oConfig = json.load(json_file)

    logfile = open('log_' + oConfig["outPath"].split('.')[0] + '.log', "w")
예제 #29
0
파일: 107.py 프로젝트: Giannie/KLBFAX
    def generate_content(self):
        import urllib2
        import now
        import json
        from time import strftime
        from dateutil import parser
        
        def friendly_date(date):
            if date.date() == datetime.today().date():
                return "Today"
            elif date.date() == datetime.today().date() + timedelta(days=1):
                return "Tomorrow"
            else:
                return date.strftime("%A %-d")

        content = colour_print(printer.text_to_ascii("Muirhead Room"))
    
        response = urllib2.urlopen("http://www.mscroggs.co.uk/room_list.json")
        events = json.load(response)
        now = now.now().replace(tzinfo=None)
        for e in events:
            e[0] = parser.parse(e[0])
            e[1] = parser.parse(e[1])
        events = [e for e in events if e[1]>now]

        occupied = False
        for event in events:
            if event[0] < now and event[1] > now:
                occupied = True
        
        if occupied == False:
            try:
                next_occupied = events[0][0]
            except:
                next_occupied = datetime.today() + timedelta(days=300)
            colours_start = self.colours.Background.GREEN + self.colours.Foreground.BLACK
            colours_end = self.colours.Foreground.DEFAULT + self.colours.Background.DEFAULT   
            if next_occupied.date() != now.date():             
                message =  "Free all day"
            else:
                message = "Free until " + next_occupied.strftime("%H:%M")
            if next_occupied.date() - now.date() <= timedelta(hours=1):             
                colours_start = self.colours.Background.YELLOW + self.colours.Foreground.BLACK
                colours_end = self.colours.Foreground.DEFAULT + self.colours.Background.DEFAULT 
            else:
                colours_start = self.colours.Background.GREEN + self.colours.Foreground.BLACK
                colours_end = self.colours.Foreground.DEFAULT + self.colours.Background.DEFAULT 
            
        if occupied == True:
            colours_start = self.colours.Background.RED + self.colours.Foreground.WHITE
            colours_end = self.colours.Foreground.DEFAULT + self.colours.Background.DEFAULT   
            next_free = 0
            i = 0
            while next_free == 0:
                if events[i+1][0] != events[i][1]:
                    next_free = events[i][1]
                i+=1
            message = "Busy until " + next_free.strftime("%H:%M")
        
        left_banner = " "*int((screen.WIDTH - len(message))/2)
        right_banner = " "*int(round((screen.WIDTH - len(message))/2))
        
        content += "\n\n"       
        content += colours_start + left_banner + message + right_banner + colours_end + "\n"
        
        previous_date = datetime(2015,3,14).date()
        for event in events:
            start_time = event[0]
            end_time = event[1]
            name = event[2]

            if end_time.date() != previous_date:
                content += "\n  "+self.colours.Foreground.GREEN
                content += friendly_date(end_time)
                content += self.colours.Foreground.DEFAULT + "\n"
            content += self.colours.Foreground.RED
            
            content += start_time.strftime("%H:%M") + "-" + end_time.strftime("%H:%M") + " "
            content += self.colours.Foreground.DEFAULT
            content += name
            content += "\n"
            previous_date = end_time.date()
          
        self.content = content
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    schoolReq = reqFields

    if len(schoolReq) != 1:
        errfile.write(
            str(now.now())
            + " Requested data "
            + str(schoolReq).strip("[]")
            + " don't match the excel file. This code is only for extracting data from filed 'State-funded primary, secondary and special schools (5)' with 'Percentage of persistent absentees (4)'. Please check the file at: "
            + str(url)
            + " . End progress\n"
        )
        logfile.write(str(now.now()) + " error and end progress\n")
        sys.exit(
            "Requested data "
            + str(schoolReq).strip("[]")
            + " don't match the excel file. This code is only for extracting data from filed 'State-funded primary, secondary and special schools (5)' with 'Percentage of persistent absentees (4)'. Please check the file at: "
            + url
        )

    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + " excel file loading\n")
    print("excel file loading------")
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    iYear = (df.iloc[2, 0].split(","))[0]

    # indicator checking
    logfile.write(str(now.now()) + " indicator checking\n")
    print("indicator checking------")
    for i in range(df.shape[0]):
        numCol = []
        for k in schoolReq:
            k_asked = k
            for j in range(df.shape[1]):
                if str(k_asked) in str(df.iloc[i, j]):
                    numCol.append(j)
                    restartIndex = i + 1

        if len(numCol) == len(schoolReq):
            break

    if len(numCol) != len(schoolReq):
        errfile.write(
            str(now.now())
            + " Requested data "
            + str(schoolReq).strip("[]")
            + " don't match the excel file. Please check the file at: "
            + str(url)
            + " . End progress\n"
        )
        logfile.write(str(now.now()) + " error and end progress\n")
        sys.exit(
            "Requested data "
            + str(schoolReq).strip("[]")
            + " don't match the excel file. Please check the file at: "
            + url
        )

    numCol.append(df.shape[1])

    for i in range(restartIndex, df.shape[0]):
        kk = []
        k_asked = "Percentage of persistent absentees (4)"
        for k in range(len(numCol) - 1):
            for j in range(numCol[k], numCol[k + 1]):
                if df.iloc[i, j] == k_asked:
                    kk.append(j)
                    restartIndex = i + 1
                    break

        if len(kk) == len(schoolReq):
            break

    numCol.pop()

    if len(kk) != len(schoolReq):
        sys.exit(
            "Requested data "
            + str(schoolReq).strip("[]")
            + " in the field 'Percentage of persistent absentees (4)' don't match the excel file. Please check the file at: "
            + url
        )

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + " data reading\n")
    print("data reading------")
    for i in range(restartIndex, df.shape[0]):
        for k in kk:
            if re.match(r"E\d{8}$", str(df.iloc[i, 1])):
                raw_data[col[0]].append(df.iloc[i, 1])
                raw_data[col[1]].append(df.iloc[i, 3])
                raw_data[col[2]].append(iYear)
                raw_data[col[3]].append(df.iloc[i, k])
    logfile.write(str(now.now()) + " data reading end\n")
    print("data reading end------")

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    homeReq = reqFields

    if len(homeReq) != 1:
        errfile.write(str(now.now()) + " Requested data " + str(homeReq).strip(
            '[]') + " don't match the excel file. This code is only for extracting data from filed 'e1b1a'. Please check the file at: " + str(
            url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(homeReq).strip(
            '[]') + " don't match the excel file. This code is only for extracting data from filed 'e1b1a'. Please check the file at: " + url)

    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + ' excel file loading\n')
    print('excel file loading------')
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # find year and quarter
    listurl = (url.split('_'))
    iYQ = listurl[len(listurl) - 1]
    iYQ = (iYQ.split('.'))[0]
    iYear = iYQ[:4]
    iQuarter = str(int(int(iYQ[4:]) / 3))

    # indicator checking
    logfile.write(str(now.now()) + ' indicator checking\n')
    print('indicator checking------')
    for i in range(df.shape[0]):
        numCol = []
        for k in homeReq:
            for j in range(df.shape[1]):
                if df.iloc[i][j] == k:
                    numCol.append(j)
                    restartIndex = i + 1

        if len(numCol) == len(homeReq):
            break

    if len(numCol) != len(homeReq):
        errfile.write(str(now.now()) + " Requested data " + str(homeReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(homeReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + url)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(restartIndex, df.shape[0]):
        for k in numCol:
            if re.match(r'E\d{8}$', str(df.index[i][0])):
                raw_data[col[0]].append(df.index[i][0])
                raw_data[col[1]].append(df.index[i][1])
                raw_data[col[2]].append(iYear)
                raw_data[col[3]].append(iQuarter)
                raw_data[col[4]].append(df.iloc[i][k])
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)