def classify_by_boat(indir, outdir, pstatus):
    """
    indir: input directory, that is the path of read data
    outdir: output directory, that is that path of save data
    pstatus: telemetry_status file
    function:
        accroding the lowell_sn and time to find the file belong to which veseel, and the same vessel produces files put in the same folder.
    notice:this code is suitable for matching data after 2000
    """
    if not os.path.exists(outdir):
        os.makedirs(outdir)


#    if os.listdir(output_dir):
#        print ('please input a empty directory!')
#        sys.exit()
#read the file of the telementry_status
    df = rdm.read_telemetrystatus(pstatus)
    #fix the format of time about logger_change
    for i in df.index:
        if df['logger_change'].isnull()[i]:
            continue
        else:
            date_logger_change = df['logger_change'][i].split(
                ',')  #get the time data of the logger_change
            for j in range(0, len(date_logger_change)):
                if len(
                        date_logger_change[j]
                ) > 4:  #keep the date have the month and year such as 1/17
                    date_logger_change[j] = zl.transform_date(
                        date_logger_change[j]
                    )  #use the transform_date(date) to fix the date
            df['logger_change'][i] = date_logger_change
    #get the path and name of the files
    file_lists = glob.glob(os.path.join(indir, '*.csv'))
    #classify the file
    for file in file_lists:
        #time conversion, GMT time to local time
        #time_str=file.split('/')[len(file.split('/'))-1:][0].split('.')[0].split('_')[2]+' '+file.split('/')[len(file.split('/'))-1:][0].split('.')[0].split('_')[3]
        time_str = file.split('\\')[len(file.split('\\')) - 1:][0].split(
            '.')[0].split('_')[2] + ' ' + file.split(
                '\\')[len(file.split('\\')) -
                      1:][0].split('.')[0].split('_')[3]
        time_local = zl.gmt_to_eastern(time_str[0:4] + '-' + time_str[4:6] +
                                       '-' + time_str[6:8] + ' ' +
                                       time_str[9:11] + ':' + time_str[11:13] +
                                       ':' +
                                       time_str[13:15]).strftime("%Y%m%d")
        #match the SN and date
        for i in range(len(df)):
            if df['Lowell-SN'].isnull()[i] or df['logger_change'].isnull(
            )[i]:  #we will enter the next line if SN or date is not exist
                continue
            else:
                for j in range(len(df['Lowell-SN'][i].split(','))):
                    #fname_len_SN=len(file.split('/')[len(file.split('/'))-1:][0].split('_')[1]) #the length of SN in the file name
                    fname_len_SN = len(
                        file.split('\\')[len(file.split('\\')) -
                                         1:][0].split('_')[1])
                    len_SN = len(
                        df['Lowell-SN'][i].split(',')[j]
                    )  #the length of SN in the culumn of the Lowell-SN inthe file of the telemetry_status.csv
                    if df['Lowell-SN'][i].split(
                            ',')[j][len_SN - fname_len_SN:] == file.split(
                                '\\')[len(file.split('\\')) -
                                      1:][0].split('_')[1]:
                        #if df['Lowell-SN'][i].split(',')[j][len_SN-fname_len_SN:]==file.split('/')[len(file.split('/'))-1:][0].split('_')[1]:
                        fpath, fname = os.path.split(
                            file)  #seperate the path and name of the file
                        #dstfile=(fpath).replace(indir,outdir+'/'+df['Boat'][i]+'/'+fname.split('_')[2][:6]+'/'+fname) #produce the path+filename of the destination
                        dstfile = (fpath).replace(
                            indir, outdir + '\\' + df['Boat'][i] + '\\' +
                            fname.split('_')[2][:6] + '\\' + fname)
                        #dstfile=dstfile.replace('//','/').replace(' ','_')
                        dstfile = dstfile.replace('//', '\\').replace(' ', '_')

                        try:  #copy the file to the destination folder
                            if j < len(df['logger_change'][i]) - 1:
                                if df['logger_change'][i][
                                        j] <= time_local <= df[
                                            'logger_change'][i][j + 1]:
                                    zl.copyfile(file, dstfile)
                            else:
                                if df['logger_change'][i][j] <= time_local:
                                    zl.copyfile(file, dstfile)
                        except KeyboardInterrupt:
                            sys.exit()
                        except:
                            print(
                                'NOTE: ' + fname +
                                ' does not have all the info it needs like date of last change.'
                            )
                            print(
                                "Please check telemetry status for this probe."
                            )
def match_tele_raw(
        input_dir,
        path_save,
        telemetry_status,
        start_time,
        end_time,
        telemetry_path='https://www.nefsc.noaa.gov/drifter/emolt.dat',
        accept_minutes_diff=20,
        acceptable_distance_diff=2,
        dpi=300):
    """
    match the file and telementy.
    we can known how many file send to the satallite and output the figure
    """

    #read the file of the telementry_status
    telemetrystatus_df = read_telemetrystatus(telemetry_status)
    #st the record file use to write minmum maxmum and average of depth and temperature,the numbers of file, telemetry and successfully matched
    record_file_df=telemetrystatus_df.loc[:,['Boat','Vessel#']].reindex(columns=['Boat','Vessel#','matched_number','file_number','tele_num','max_diff_depth',\
                                      'min_diff_depth','average_diff_depth','max_diff_temp','min_diff_temp','average_diff_temp','sum_diff_depth','sum_diff_temp',\
                                      'min_lat','max_lat','min_lon','max_lon'],fill_value=None)
    #transfer the time format of string to datetime
    start_time_local = datetime.strptime(start_time, '%Y-%m-%d')
    end_time_local = datetime.strptime(end_time, '%Y-%m-%d')
    allfile_lists = zl.list_all_files(input_dir)
    ######################
    file_lists = []
    for file in allfile_lists:
        if file[len(file) - 4:] == '.csv':
            file_lists.append(file)
    #download the data of telementry
    tele_df = read_telemetry(telemetry_path)
    #screen out the data of telemetry in interval
    valuable_tele_df = pd.DataFrame(
        data=None,
        columns=['vessel_n', 'esn', 'time', 'lon', 'lat', 'depth',
                 'temp'])  #use to save the data during start time and end time
    for i in range(len(tele_df)):
        tele_time=datetime.strptime(str(tele_df['year'].iloc[i])+'-'+str(tele_df['month'].iloc[i])+'-'+str(tele_df['day'].iloc[i])+' '+\
                                         str(tele_df['Hours'].iloc[i])+':'+str(tele_df['minates'].iloc[i])+':'+'00','%Y-%m-%d %H:%M:%S')
        if zl.local2utc(start_time_local) <= tele_time < zl.local2utc(
                end_time_local):
            valuable_tele_df=valuable_tele_df.append(pd.DataFrame(data=[[tele_df['vessel_n'][i],tele_df['esn'][i],tele_time,tele_df['lon'][i],tele_df['lat'][i],tele_df['depth'][i],\
                                                       tele_df['temp'][i]]],columns=['vessel_n','esn','time','lon','lat','depth','temp']))
    valuable_tele_df.index = range(len(valuable_tele_df))
    #whether the data of file and telemetry is exist
    if len(valuable_tele_df) == 0 and len(file_lists) == 0:
        print(
            'please check the data website of telementry and the directory of raw_data is exist!'
        )
        sys.exit()
    elif len(valuable_tele_df) == 0:
        print('please check the data website of telementry!')
        sys.exit()
    elif len(file_lists) == 0:
        print('please check the directory raw_data is exist!')
        sys.exit()
    #match the file
    index = telemetrystatus_df['Boat']  #set the index for dictionary
    raw_dict = {
    }  #the dictinary about raw data, use to write the data about 'time','filename','mean_temp','mean_depth'
    tele_dict = {
    }  #the dictionary about telementry data,use to write the data about'time','mean_temp','mean_depth'
    for i in range(len(index)):  #loop every boat
        raw_dict[index[i]] = pd.DataFrame(data=None,
                                          columns=[
                                              'time', 'filename', 'mean_temp',
                                              'mean_depth', 'mean_lat',
                                              'mean_lon'
                                          ])
        tele_dict[index[i]] = pd.DataFrame(data=None,
                                           columns=[
                                               'time', 'mean_temp',
                                               'mean_depth', 'mean_lat',
                                               'mean_lon'
                                           ])
    for file in file_lists:  # loop raw files
        fpath, fname = os.path.split(file)  #get the file's path and name
        # now, read header and data of every file
        header_df = zl.nrows_len_to(file, 2, name=['key',
                                                   'value'])  #only header
        data_df = zl.skip_len_to(file, 2)  #only data

        #caculate the mean temperature and depth of every file
        value_data_df = data_df.ix[(
            data_df['Depth(m)'] >
            0.85 * mean(data_df['Depth(m)']))]  #filter the data
        value_data_df = value_data_df.ix[
            2:]  #delay several minutes to let temperature sensor record the real bottom temp
        value_data_df=value_data_df.ix[(value_data_df['Temperature(C)']>mean(value_data_df['Temperature(C)'])-3*std(value_data_df['Temperature(C)'])) & \
                   (value_data_df['Temperature(C)']<mean(value_data_df['Temperature(C)'])+3*std(value_data_df['Temperature(C)']))]  #Excluding gross error
        value_data_df.index = range(len(value_data_df))  #reindex
        for i in range(len(value_data_df['Lat'])):
            value_data_df['Lat'][i], value_data_df['Lon'][i] = cv.dm2dd(
                value_data_df['Lat'][i], value_data_df['Lon'][i])
        min_lat = min(value_data_df['Lat'].values)
        max_lat = max(value_data_df['Lat'].values)
        min_lon = min(value_data_df['Lon'].values)
        max_lon = max(value_data_df['Lon'].values)
        mean_lat = str(round(mean(value_data_df['Lat'].values), 4))
        mean_lon = str(round(mean(value_data_df['Lon'].values),
                             4))  #caculate the mean depth
        mean_temp = str(
            round(mean(value_data_df['Temperature(C)'][1:len(value_data_df)]),
                  2))
        mean_depth = str(
            abs(int(round(mean(value_data_df['Depth(m)'].values))))).zfill(
                3)  #caculate the mean depth

        #get the vessel number of every file
        for i in range(len(header_df)):
            if header_df['key'][i].lower() == 'vessel number'.lower():
                vessel_number = int(header_df['value'][i])
                break
        #caculate the number of raw files in every vessel,and min,max of lat and lon
        for i in range(len(record_file_df)):
            if record_file_df['Vessel#'][i] == vessel_number:
                if record_file_df['file_number'].isnull()[i]:
                    record_file_df['min_lat'][i] = min_lat
                    record_file_df['max_lat'][i] = max_lat
                    record_file_df['min_lon'][i] = min_lon
                    record_file_df['max_lon'][i] = max_lon
                    record_file_df['file_number'][i] = 1
                else:
                    record_file_df['file_number'][i] = int(
                        record_file_df['file_number'][i] + 1)
                    if record_file_df['min_lat'][i] > min_lat:
                        record_file_df['min_lat'][i] = min_lat
                    if record_file_df['max_lat'][i] < max_lat:
                        record_file_df['max_lat'][i] = max_lat
                    if record_file_df['min_lon'][i] > min_lon:
                        record_file_df['min_lon'][i] = min_lon
                    if record_file_df['max_lon'][i] < max_lon:
                        record_file_df['max_lon'][i] = max_lon

        #match rawdata and telementry data
        time_str = fname.split('.')[0].split('_')[2] + ' ' + fname.split(
            '.')[0].split('_')[3]
        #GMT time to local time of file
        time_local = zl.gmt_to_eastern(time_str[0:4] + '-' + time_str[4:6] +
                                       '-' + time_str[6:8] + ' ' +
                                       time_str[9:11] + ':' + time_str[11:13] +
                                       ':' + time_str[13:15])
        time_gmt = datetime.strptime(time_str, "%Y%m%d %H%M%S")
        #transfer the format latitude and longitude
        lat, lon = value_data_df['Lat'][
            len(value_data_df) - 1], value_data_df['Lon'][len(value_data_df) -
                                                          1]
        #write the data of raw file to dict
        for i in range(len(telemetrystatus_df)):
            if telemetrystatus_df['Vessel#'][i] == vessel_number:
                raw_dict[telemetrystatus_df['Boat'][i]]=raw_dict[telemetrystatus_df['Boat'][i]].append(pd.DataFrame(data=[[time_local,\
                                    fname,float(mean_temp),float(mean_depth),float(mean_lat),float(mean_lon)]],columns=['time','filename','mean_temp','mean_depth','mean_lat','mean_lon']).iloc[0],ignore_index=True)
        #caculate the numbers of successful matchs and the minimum,maximum and average different of temperature and depth, and write this data to record file
        for i in range(len(valuable_tele_df)):
            if valuable_tele_df['vessel_n'][i].split('_')[1] == str(
                    vessel_number):
                if abs(valuable_tele_df['time'][i] - time_gmt) <= timedelta(
                        minutes=accept_minutes_diff):  #time match
                    if zl.dist(lat1=lat,
                               lon1=lon,
                               lat2=float(valuable_tele_df['lat'][i]),
                               lon2=float(valuable_tele_df['lon'][i])
                               ) <= acceptable_distance_diff:  #distance match
                        for j in range(len(record_file_df)):
                            if record_file_df['Vessel#'][j] == vessel_number:
                                diff_temp = round(
                                    (float(mean_temp) -
                                     float(valuable_tele_df['temp'][i])), 4)
                                diff_depth = round(
                                    (float(mean_depth) -
                                     float(valuable_tele_df['depth'][i])), 4)
                                if record_file_df['matched_number'].isnull(
                                )[j]:
                                    record_file_df['matched_number'][j] = 1
                                    record_file_df['sum_diff_temp'][
                                        j] = diff_temp
                                    record_file_df['max_diff_temp'][
                                        j] = diff_temp
                                    record_file_df['min_diff_temp'][
                                        j] = diff_temp
                                    record_file_df['sum_diff_depth'][
                                        j] = diff_depth
                                    record_file_df['max_diff_depth'][
                                        j] = diff_depth
                                    record_file_df['min_diff_depth'][
                                        j] = diff_depth
                                    break
                                else:
                                    record_file_df['matched_number'][j] = int(
                                        record_file_df['matched_number'][j] +
                                        1)
                                    record_file_df['sum_diff_temp'][
                                        j] = record_file_df['sum_diff_temp'][
                                            j] + diff_temp
                                    record_file_df['sum_diff_depth'][
                                        j] = record_file_df['sum_diff_depth'][
                                            j] + diff_depth
                                    if record_file_df['max_diff_temp'][
                                            j] < diff_temp:
                                        record_file_df['max_diff_temp'][
                                            j] = diff_temp
                                    if record_file_df['min_diff_temp'][
                                            j] > diff_temp:
                                        record_file_df['min_diff_temp'][
                                            j] = diff_temp
                                    if record_file_df['max_diff_depth'][
                                            j] < diff_depth:
                                        record_file_df['max_diff_depth'][
                                            j] = diff_depth
                                    if record_file_df['min_diff_depth'][
                                            j] > diff_depth:
                                        record_file_df['min_diff_depth'][
                                            j] = diff_depth
                                    break

    #write 'time','mean_temp','mean_depth' of the telementry to tele_dict
    for i in range(
            len(valuable_tele_df)
    ):  #valuable_tele_df is the valuable telemetry data during start time and end time
        for j in range(len(telemetrystatus_df)):
            if int(valuable_tele_df['vessel_n'][i].split('_')
                   [1]) == telemetrystatus_df['Vessel#'][j]:
                #count the numbers by boats
                if record_file_df['tele_num'].isnull()[j]:
                    record_file_df['tele_num'][j] = 1
                else:
                    record_file_df['tele_num'][
                        j] = record_file_df['tele_num'][j] + 1
                if record_file_df['max_lat'].isnull()[j]:
                    record_file_df['min_lat'][j] = valuable_tele_df['lat'][i]
                    record_file_df['max_lat'][j] = valuable_tele_df['lat'][i]
                    record_file_df['min_lon'][j] = valuable_tele_df['lon'][i]
                    record_file_df['max_lon'][j] = valuable_tele_df['lon'][i]
                else:
                    if record_file_df['min_lat'][j] > valuable_tele_df['lat'][
                            i]:
                        record_file_df['min_lat'][j] = valuable_tele_df['lat'][
                            i]
                    if record_file_df['max_lat'][j] < valuable_tele_df['lat'][
                            i]:
                        record_file_df['max_lat'][j] = valuable_tele_df['lat'][
                            i]
                    if record_file_df['min_lon'][j] > valuable_tele_df['lon'][
                            i]:
                        record_file_df['min_lon'][j] = valuable_tele_df['lon'][
                            i]
                    if record_file_df['max_lon'][j] < valuable_tele_df['lon'][
                            i]:
                        record_file_df['max_lon'][j] = valuable_tele_df['lon'][
                            i]
                #write 'time','mean_temp','mean_depth' of the telementry to tele_dict
                tele_dict[telemetrystatus_df['Boat'][j]]=tele_dict[telemetrystatus_df['Boat'][j]].append(pd.DataFrame(data=[[valuable_tele_df['time'][i],\
                         float(valuable_tele_df['temp'][i]),float(valuable_tele_df['depth'][i]),float(valuable_tele_df['lat'][i]),float(valuable_tele_df['lon'][i])]],columns=['time','mean_temp','mean_depth','mean_lat','mean_lon']).iloc[0],ignore_index=True)
    print("finish the calculate of min_lat and min_lon!")
    for i in range(len(record_file_df)):
        if not record_file_df['matched_number'].isnull()[i]:
            record_file_df['average_diff_depth'][i] = round(
                record_file_df['sum_diff_depth'][i] /
                record_file_df['matched_number'][i], 4)
            record_file_df['average_diff_temp'][i] = round(
                record_file_df['sum_diff_temp'][i] /
                record_file_df['matched_number'][i], 4)
        else:
            record_file_df['matched_number'][i] = 0
        if record_file_df['tele_num'].isnull()[i]:
            record_file_df['tele_num'][i] = 0
        if record_file_df['file_number'].isnull()[i]:
            record_file_df['file_number'][i] = 0

    for i in index:  #loop every boat,  i represent the name of boat
        raw_dict[i] = raw_dict[i].sort_values(by=['time'])
        raw_dict[i].index = range(len(raw_dict[i]))

    record_file_df = record_file_df.drop(['sum_diff_depth', 'sum_diff_temp'],
                                         axis=1)
    #save the record file
    record_file_df.to_csv(path_save + '/' + start_time + '_' + end_time +
                          ' statistics.csv',
                          index=0)
    return raw_dict, tele_dict, record_file_df, index, start_time_local, end_time_local, path_save
                    date_logger_change[j]
                )  #use the transform_date(date) to fix the date
        telemetry_status_df['logger_change'][i] = date_logger_change
    else:
        continue
#get the path and name of the file that need to match
file_lists = glob.glob(os.path.join(input_dir, '*.csv'))
#match the file
for file in file_lists:
    #time conversion, GMT time to local time
    time_str = file.split('/')[len(file.split('/')) - 1:][0].split(
        '.')[0].split('_')[2] + ' ' + file.split(
            '/')[len(file.split('/')) - 1:][0].split('.')[0].split('_')[3]
    #GMT time to local time of file
    time_local = zl.gmt_to_eastern(time_str[0:4] + '-' + time_str[4:6] + '-' +
                                   time_str[6:8] + ' ' + time_str[9:11] + ':' +
                                   time_str[11:13] + ':' +
                                   time_str[13:15]).strftime("%Y%m%d")

    #math the SN and date
    for i in range(len(telemetry_status_df)):
        if not telemetry_status_df['Lowell-SN'].isnull(
        )[i] and not telemetry_status_df['logger_change'].isnull(
        )[i]:  #we will enter the next line if SN or date is not exist
            for j in range(len(
                    telemetry_status_df['Lowell-SN'][i].split(','))):
                fname_len_SN = len(
                    file.split('/')[len(file.split('/')) - 1:][0].split('_')
                    [1])  #the length of SN in the file name
                len_SN = len(
                    telemetry_status_df['Lowell-SN'][i].split(',')[j]
                )  #the length of SN in the culumn of the Lowell-SN inthe file of the telemetry_status.csv
def classify_by_boat(input_dir, output_dir, telemetry_status_path_name):
    """
    this code used to know which boat get the data
    and put the data file to the right folder
    notice:this code is suitable for matching data after 2000
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if os.listdir(output_dir):
        print('please input a empty directory!')
        sys.exit()
    #read the file of the telementry_status
    df = read_telemetrystatus(telemetry_status_path_name)

    #fix the format of time about logger_change
    for i in range(len(df)):
        if df['logger_change'].isnull()[i]:
            continue
        else:
            date_logger_change = df['logger_change'][i].split(
                ',')  #get the time data of the logger_change
            for j in range(0, len(date_logger_change)):
                if len(
                        date_logger_change[j]
                ) > 4:  #keep the date have the month and year such as 1/17
                    date_logger_change[j] = zl.transform_date(
                        date_logger_change[j]
                    )  #use the transform_date(date) to fix the date
            df['logger_change'][i] = date_logger_change

    #get the path and name of the file that need to match
    file_lists = glob.glob(os.path.join(input_dir, '*.csv'))
    #match the file
    for file in file_lists:
        #time conversion, GMT time to local time
        time_str = file.split('/')[len(file.split('/')) - 1:][0].split(
            '.')[0].split('_')[2] + ' ' + file.split(
                '/')[len(file.split('/')) - 1:][0].split('.')[0].split('_')[3]
        #GMT time to local time of file
        time_local = zl.gmt_to_eastern(time_str[0:4] + '-' + time_str[4:6] +
                                       '-' + time_str[6:8] + ' ' +
                                       time_str[9:11] + ':' + time_str[11:13] +
                                       ':' +
                                       time_str[13:15]).strftime("%Y%m%d")
        #math the SN and date
        for i in range(len(df['Lowell-SN'])):
            if df['Lowell-SN'].isnull()[i] or df['logger_change'].isnull(
            )[i]:  #we will enter the next line if SN or date is not exist
                continue
            else:
                for j in range(len(df['Lowell-SN'][i].split(','))):
                    fname_len_SN = len(
                        file.split('/')[len(file.split('/')) - 1:][0].split(
                            '_')[1])  #the length of SN in the file name
                    len_SN = len(
                        df['Lowell-SN'][i].split(',')[j]
                    )  #the length of SN in the culumn of the Lowell-SN inthe file of the telemetry_status.csv
                    if df['Lowell-SN'][i].split(
                            ',')[j][len_SN - fname_len_SN:] == file.split(
                                '/')[len(file.split('/')) -
                                     1:][0].split('_')[1]:
                        fpath, fname = os.path.split(
                            file)  #seperate the path and name of the file
                        dstfile = (fpath).replace(
                            input_dir,
                            output_dir + '/' + df['Boat'][i] + '/' + fname
                        )  #produce the path+filename of the destination
                        dstfile = dstfile.replace('//', '/')

                        #copy the file to the destination folder
                        try:
                            if j < len(df['logger_change'][i]) - 1:
                                if df['logger_change'][i][
                                        j] <= time_local <= df[
                                            'logger_change'][i][j + 1]:
                                    zl.copyfile(file, dstfile)
                            else:
                                if df['logger_change'][i][j] <= time_local:
                                    zl.copyfile(file, dstfile)
                        except:
                            print("please check the data of telemetry status!")