#//////////////////////////////////////////////////////////////////////////////

# ~ 3 ~
# Import Data

# Update Patch table with new release info (DO THIS FIRST!)
rd3.add(entity='rd3_patch',
        data={
            'id': patchinfo.get('id'),
            'type': patchinfo.get('type'),
            'date': patchinfo.get('date'),
            'description': patchinfo.get('description')
        })

# import new orgs; import ERNs if needed, but highly unlikely
rd3.importData(entity='rd3_organisation', data=dtFrameToRecords(newOrgs))

# prep data for import into RD3
rd3_subjects = dtFrameToRecords(data=subjects)
rd3_subjectInfo = dtFrameToRecords(data=subjectInfo)
rd3_samples = dtFrameToRecords(data=samples)
rd3_labinfo = dtFrameToRecords(data=labinfo)

# import data
rd3.importData(entity=f'rd3_{patchinfo["name"]}_subject', data=rd3_subjects)
rd3.importData(entity=f'rd3_{patchinfo["name"]}_subjectinfo',
               data=rd3_subjectInfo)
rd3.importData(entity=f'rd3_{patchinfo["name"]}_sample', data=rd3_samples)
rd3.importData(entity=f'rd3_{patchinfo["name"]}_labinfo', data=rd3_labinfo)

# upodate portal
}

# join with subjects
subjects.key='subjectID'
filesSummarized.key='subjectID'
subjects=subjects[:, :, dt.join(filesSummarized)]


#///////////////////////////////////////

# import data
statusMsg('Importing data....')

# import subject IDs first
rd3.importData(
    entity='rd3_overview',
    data=subjects['subjectID'].to_pandas().to_dict('records')
)

# import row data
overviewData = subjects.to_pandas().replace({np.nan:None}).to_dict('records')
rd3.updateRows(entity='rd3_overview', data=overviewData)


# update values
# rd3.updateColumn(
#     entity='rd3_overview',
#     attr='hasOnlyNovelOmics',
#     data=dtFrameToRecords(
#         data=subjects[:,['subjectID', 'hasOnlyNovelOmics']]
#     )
# )
# Import data into rd3_<release>_subject and rd3_<release>_subjectinfo
for dataset in subjectsByAnalysis:
    if dataset != '_nrows':
        statusMsg('Importing subject data into', novelOmicsReleases[dataset])

        rd3_subject = dtFrameToRecords(
            subjectsByAnalysis[dataset][:, f[:].remove(f.typeOfAnalysis)])
        rd3_subjectinfo = dtFrameToRecords(
            subjectsByAnalysis[dataset][:, {
                'id': f.id,
                'subjectID': f.id,
                'patch': f.patch
            }])

        # import data
        rd3.importData(entity=f'{novelOmicsReleases[dataset]}_subject',
                       data=rd3_subject)
        rd3.importData(entity=f'{novelOmicsReleases[dataset]}_subjectinfo',
                       data=rd3_subjectinfo)

# ~ 6b ~
# Import rd3_<release>_sample
for dataset in samplesByAnalysis:
    if dataset != '_nrows':
        statusMsg('Importing samples into', novelOmicsReleases[dataset])

        rd3_sample = dtFrameToRecords(
            samplesByAnalysis[dataset][:, f[:].remove(f.typeOfAnalysis)])
        rd3.importData(entity=f'{novelOmicsReleases[dataset]}_sample',
                       data=rd3_sample)

# ~ 6c ~
        f['filepath'] for f in raw_ped
        if re.search(r'((.ped)|(.ped.cip))$', f['filename'])
    ]
    json_files = [
        f['filepath'] for f in raw_json
        if re.search(r'(.json)$', f['filename'])
    ]
    cluster_files = ped_files + json_files

    # prep files
    for file in cluster_files:
        if re.search(r'((.ped)|(.ped.cip))$', file): folder = '/ped/'
        if re.search(r'(.json)$', file): folder = '/phenopacket/'
        fileMetadata = {
            'release': os.path.basename(dir['filepath']),
            'path': file,
            'name': os.path.basename(file),
            'type': folder.replace('/', ''),
            'created': str(datetime.now()).replace(' ', 'T') + 'Z'
        }
        fileMetadata['md5sum'] = clustertools.md5sum(path=fileMetadata['path'])
        files.append(fileMetadata)

# filter files - remove duplicates
data = pd.DataFrame(files).drop_duplicates(subset='name',
                                           keep='first').to_dict('records')

# import into RD3
rd3.delete('rd3_portal_cluster')
rd3.importData(entity='rd3_portal_cluster', data=files)