def setUpModule():
        config = IniParser().read_ini("download.ini")
        # Download intact file from source
        section_intact = config["INTACT"]
        file_url = section_intact['location'] + section_intact['files']
        status = FTPDownload.download(file_url, TEST_DATA_DIR, INTACT)
        logger.warn("Download status for intact " + str(status))

        section_bioplex = config["BIOPLEX"]
        file_url = section_bioplex['location'] + section_bioplex['files']

        status = HTTPDownload.download(file_url, TEST_DATA_DIR, BIOPLEX)
        logger.warn("Download status for bioplex " + str(status))
    def check_intact_data(self, child_doc, parent_doc):

        config = IniParser().read_ini("download.ini")
        self.assertEqual(getattr(child_doc, "interaction_source"), 'intact', 'interaction_source is intact')

        # Get interactors already stored in our pipeline
        interactors = getattr(child_doc, 'interactors')
        pydgin_interactors = [interactor['interactor'] for interactor in interactors]

        # add parent id as well
        parent_id = parent_doc.doc_id()
        pydgin_interactors.append(parent_id)

        self.assertEqual(parent_id, child_doc.parent(), 'Parent id ok')

        # Download intact file from source and search for the parent entrez id interactors
        section_intact = config["INTACT"]
        file_url = section_intact['location'] + section_intact['files']
        status = FTPDownload.download(file_url, '/tmp', 'intact.zip')
        # status = True
        if status:
            parent_intact = set()
            zf = zipfile.ZipFile('/tmp/intact.zip', 'r')

            my_regex = re.escape(parent_id)
            if 'intact.txt' in zf.namelist():
                target_path = zf.extract(member='intact.txt', path='/tmp')
                with open(target_path, encoding='utf-8') as csvfile:
                    reader = csv.reader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)
                    for row in reader:
                        line = '\t'.join(row)
                        match = re.search(my_regex, line)
                        if match:
                            parent_intact.add(line)

                intact_interactors = set()
                for line in parent_intact:
                    result_list = re.findall(r"(ENSG[0-9]*)", line)
                    if result_list:
                        intact_interactors |= set(result_list)  # union operator

                self.assertEqual(len(pydgin_interactors), len(intact_interactors), "Interactors size equal")
 def test_ftp(self):
     ''' Test downloading over FTP. '''
     self.assertTrue(FTPDownload.download('ftp://ftp.ebi.ac.uk/pub/databases/embl/README',
                                          '/tmp', 'ftp.test'),
                     'FTP download test')
 def test_ftp_mtime(self):
     ''' Test mtime from a file on a FTP server. '''
     self.assertTrue(FTPDownload.mtime('ftp://ftp.ebi.ac.uk/pub/databases/embl/README') > 0,
                     'FTP file/dir exists')
 def test_ftp_exists(self):
     ''' Test FTP exists. '''
     self.assertTrue(FTPDownload.exists('ftp://ftp.ebi.ac.uk/'), 'FTP file/dir exists')
     self.assertFalse(FTPDownload.exists('ftp://ftp.ebi.ac.uk/xxxx'), 'FTP file/dir exists')