Esempio n. 1
0
    def downloadZIP(self):
        print '- Starting download ZIP files.'
        st = time.time()
        sp = SourceParser.SourceParser()

        sp.getALLFormats()
        ls_xml4 = sp.links_G_XML4  #396 412
        ls_xml2 = sp.links_G_XML2  #157
        ls_xml24 = sp.links_G_XML2_4  #52
        ls_aps = sp.links_G_APS  #252 final 26
        for dLink in ls_xml4:
            exist_path = (os.getcwd() + '/ZIP_G/XML4/' +
                          os.path.basename(dLink)).replace('\\', '/')
            if (not os.path.exists(exist_path)):
                urllib.urlretrieve(dLink, exist_path)
                urllib.urlcleanup()
        for dLink in ls_xml2:
            exist_path = (os.getcwd() + '/ZIP_G/XML2/' +
                          os.path.basename(dLink)).replace('\\', '/')
            if (not os.path.exists(exist_path)):
                urllib.urlretrieve(dLink, exist_path)
                urllib.urlcleanup()
        for dLink in ls_xml24:
            exist_path = (os.getcwd() + '/ZIP_G/XML24/' +
                          os.path.basename(dLink)).replace('\\', '/')
            if (not os.path.exists(exist_path)):
                urllib.urlretrieve(dLink, exist_path)
                urllib.urlcleanup()
        for dLink in ls_aps:
            exist_path = (os.getcwd() + '/ZIP_G/APS/' +
                          os.path.basename(dLink)).replace('\\', '/')
            if (not os.path.exists(exist_path)):
                urllib.urlretrieve(dLink, exist_path)
                urllib.urlcleanup()
        print '[Downloaded ZIP files. Cost: {0}]'.format(time.time() - st)
Esempio n. 2
0
def newfuncXML2(filePath):
    try:
        log = LogProcessor.LogProcess()
        fileName = os.path.basename(filePath)
        sp = SourceParser.SourceParser()
        xmlStr = sp.getXML2Content_DPL(filePath)
        g = GrantsParser()
        g.extractXML2(xmlStr)
        g.writeCSV()
        log.write(log.logPath_G, fileName + ', Processed')
    except Exception, e:
        print e
        log.write(log.logPath_G, fileName + ', Failed')
Esempio n. 3
0
    def checkAll(self):
        log = LogProcessor.LogProcess()
        f_log_g = open(self.logPath_G, 'rb')
        f_log_p = open(self.logPath_P, 'rb')
        f_log_pair = open(self.logPath_PAIR, 'rb')
        self.processedG = f_log_g.readlines()
        self.processedP = f_log_p.readlines()
        #self.processedPAIR=f_log_pair.readlines()
        for i in self.processedG:
            if (i.split('\t')[4].strip() == 'Processed'
                    or i.split('\t')[4].strip() == 'Passed'):
                fileName = i.split('\t')[1]
                if (fileName not in self.sprocessedG):
                    self.sprocessedG.append(fileName)
        for i in self.processedP:
            if (i.split('\t')[4].strip() == 'Processed'):
                fileName = i.split('\t')[1]
                if (fileName not in self.sprocessedP):
                    self.sprocessedP.append(fileName)


##        for i in self.processedPAIR:
##            if(i.split('\t')[4].strip()=='Processed'):
##                fileName=i.split('\t')[2]
##                if(fileName not in self.sprocessedPAIR):
##                    self.sprocessedPAIR.append(fileName)
        sp = SourceParser.SourceParser()
        self.allG = sp.getdLinksPG()
        self.allP = sp.getdLinksPP()
        #self.allPAIR=sp.getdLinksPAIR()

        self.allG_XML4 = sp.getFileNamesPG_XML4()
        self.allP_XML4 = sp.getFileNamesPP_XML4()

        self.unprocessedG = list(set(self.allG_XML4) - set(self.sprocessedG))
        self.unprocessedP = list(set(self.allP_XML4) - set(self.sprocessedP))
Esempio n. 4
0
        bList[9 * m / n:10 * m / n]
    ]
    return sList


def multiProcess(allLinkList, numStep):
    for i in range(0, len(allLinkList), numStep):
        st = time.time()
        linkList = allLinkList[i:i + numStep]
        linkListProcesses = partTen(linkList)
        processes = []
        for linkList in linkListProcesses:
            processes.append(
                multiprocessing.Process(target=mainProcess, args=(linkList, )))
        for ps in processes:
            ps.start()
        for ps in processes:
            ps.join()
        print '[multiProcess finished! Time:{time}]'.format(time=time.time() -
                                                            st)


if __name__ == "__main__":
    st = time.time()
    sp = SourceParser.SourceParser()
    allLinkList = sp.getdLinksPAIR()
    multiProcess(allLinkList, 1000)

    print 'All PAIR data have been POPULATED successfully! Cost time:{0}'.format(
        time.time() - st)